From 23bf01bcd6ff54e0f3d2a4a5f0c049d0a187a5e9 Mon Sep 17 00:00:00 2001 From: masarakki Date: Sat, 6 May 2017 02:48:22 +0900 Subject: [PATCH] use Twitter::Extractor for creating links (#2502) --- app/lib/extractor.rb | 33 +++++++++++++ app/lib/formatter.rb | 97 +++++++++++++++++++++---------------- spec/lib/formatter_spec.rb | 16 ++++++ spec/models/account_spec.rb | 4 ++ 4 files changed, 107 insertions(+), 43 deletions(-) create mode 100644 app/lib/extractor.rb diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb new file mode 100644 index 0000000000..3d88b01cd2 --- /dev/null +++ b/app/lib/extractor.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Extractor + extend Twitter::Extractor + + module_function + + def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end + return [] unless text =~ Twitter::Regex[:at_signs] + + possible_entries = [] + + text.to_s.scan(Account::MENTION_RE) do |screen_name, _| + match_data = $LAST_MATCH_INFO + after = $' + unless after =~ Twitter::Regex[:end_mention_match] + start_position = match_data.char_begin(1) - 1 + end_position = match_data.char_end(1) + possible_entries << { + screen_name: screen_name, + indices: [start_position, end_position], + } + end + end + + if block_given? + possible_entries.each do |mention| + yield mention[:screen_name], mention[:indices].first, mention[:indices].last + end + end + possible_entries + end +end diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 6d0828a8d7..12b030e117 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -13,11 +13,10 @@ class Formatter return reformat(status.content) unless status.local? html = status.text - html = encode_and_link_urls(html) + html = encode_and_link_urls(html, status.mentions) + html = simple_format(html, {}, sanitize: false) html = html.delete("\n") - html = link_mentions(html, status.mentions) - html = link_hashtags(html) html.html_safe # rubocop:disable Rails/OutputSafety end @@ -37,8 +36,6 @@ class Formatter html = encode_and_link_urls(account.note) html = simple_format(html, {}, sanitize: false) html = html.delete("\n") - html = link_accounts(html) - html = link_hashtags(html) html.html_safe # rubocop:disable Rails/OutputSafety end @@ -53,51 +50,66 @@ class Formatter HTMLEntities.new.encode(html) end - def encode_and_link_urls(html) - entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false) - entities = entities.sort_by { |entity| entity[:indices].first } + def encode_and_link_urls(html, mentions = nil) + entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false) + + rewrite(html.dup, entities) do |entity| + if entity[:url] + link_to_url(entity) + elsif entity[:hashtag] + link_to_hashtag(entity) + elsif entity[:screen_name] + link_to_mention(entity, mentions) + end + end + end - chars = html.to_s.to_char_a - html_attrs = { - target: '_blank', - rel: 'nofollow noopener', - } - result = '' + def rewrite(text, entities) + chars = text.to_s.to_char_a + # sort by start index + entities = entities.sort_by do |entity| + indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] + indices.first + end + + result = [] last_index = entities.reduce(0) do |index, entity| - normalized_url = Addressable::URI.parse(entity[:url]).normalize - indices = entity[:indices] - result += encode(chars[index...indices.first].join) - result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs) + indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] + result << encode(chars[index...indices.first].join) + result << yield(entity) indices.last end - result += encode(chars[last_index..-1].join) - end + result << encode(chars[last_index..-1].join) - def link_mentions(html, mentions) - html.gsub(Account::MENTION_RE) do |match| - acct = Account::MENTION_RE.match(match)[1] - mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) } + result.flatten.join + end - mention.nil? ? match : mention_html(match, mention.account) - end + def link_to_url(entity) + normalized_url = Addressable::URI.parse(entity[:url]).normalize + html_attrs = { + target: '_blank', + rel: 'nofollow noopener', + } + Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs) end - def link_accounts(html) - html.gsub(Account::MENTION_RE) do |match| - acct = Account::MENTION_RE.match(match)[1] - username, domain = acct.split('@') - domain = nil if TagManager.instance.local_domain?(domain) - account = Account.find_remote(username, domain) + def link_to_mention(entity, mentions) + acct = entity[:screen_name] + return link_to_account(acct) unless mentions + mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) } + mention ? mention_html(mention.account) : "@#{acct}" + end - account.nil? ? match : mention_html(match, account) - end + def link_to_account(acct) + username, domain = acct.split('@') + domain = nil if TagManager.instance.local_domain?(domain) + account = Account.find_remote(username, domain) + account ? mention_html(account) : "@#{acct}" end - def link_hashtags(html) - html.gsub(Tag::HASHTAG_RE) do |match| - hashtag_html(match) - end + def link_to_hashtag(entity) + hashtag_html(entity[:hashtag]) end def link_html(url) @@ -110,12 +122,11 @@ class Formatter "#{prefix}#{text}#{suffix}" end - def hashtag_html(match) - prefix, _, affix = match.rpartition('#') - "#{prefix}##{affix}" + def hashtag_html(tag) + "##{tag}" end - def mention_html(match, account) - "#{match.split('@').first}@#{account.username}" + def mention_html(account) + "@#{account.username}" end end diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index b762907b2d..81eaf00e80 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -6,6 +6,10 @@ RSpec.describe Formatter do let(:local_status) { Fabricate(:status, text: local_text, account: account) } let(:remote_status) { Fabricate(:status, text: ' Beep boop', uri: 'beepboop', account: account) } + let(:local_text_with_mention) { "@#{account.username} @#{account.username}@example.com #{local_text}?x=@#{account.username} #hashtag" } + let(:local_status_with_mention) { Fabricate(:status, text: local_text_with_mention, + account: account, mentions: [Fabricate(:mention, account: account)]) } + describe '#format' do subject { Formatter.instance.format(local_status) } @@ -21,6 +25,18 @@ RSpec.describe Formatter do expect(subject).to match('google.com/') end + it 'contains a mention' do + result = Formatter.instance.format(local_status_with_mention) + expect(result).to match "@#{account.username}" + expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}} + expect(result).not_to match "href=\"https://example.com/@#{account.username}" + end + + it 'contains a hashtag' do + result = Formatter.instance.format(local_status_with_mention) + expect(result).to match("/tags/hashtag\" class=\"mention hashtag\">#hashtag") + end + context 'matches a stand-alone medium URL' do let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } it 'has valid url' do diff --git a/spec/models/account_spec.rb b/spec/models/account_spec.rb index 157db633a0..efd87e8717 100644 --- a/spec/models/account_spec.rb +++ b/spec/models/account_spec.rb @@ -379,6 +379,10 @@ RSpec.describe Account, type: :model do it 'does not match URLs' do expect(subject.match('Check this out https://medium.com/@alice/some-article#.abcdef123')).to be_nil end + + xit 'does not match URL querystring' do + expect(subject.match('https://example.com/?x=@alice')).to be_nil + end end describe 'validations' do