Exclude URLs from text analysis (#11759)

By the added regex, URLs, including the one without http or even www
like mysite.com will be removed from the toot's body so only the real
text of the toot will be analyzed for RTL detection
This commit is contained in:
Mostafa Ahangarha 2019-09-05 01:00:49 +04:30 committed by Eugen Rochko
parent 4f9f1c6a98
commit 6899564a76

View file

@ -20,6 +20,7 @@ export function isRtl(text) {
text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, ''); text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, '');
text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, ''); text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, '');
text = text.replace(/\s+/g, ''); text = text.replace(/\s+/g, '');
text = text.replace(/(\w\S+\.\w{2,}\S*)/g, '');
const matches = text.match(rtlChars); const matches = text.match(rtlChars);