From 815462fefff04fde5a2f7114ff90a2bcaf9010e3 Mon Sep 17 00:00:00 2001 From: abcang Date: Wed, 19 Apr 2017 21:52:18 +0900 Subject: [PATCH] Fix html escape characters in the URL (#2138) * fix character escaping in URL * add tests * put a comma after the last item * add HTML escape test --- app/lib/formatter.rb | 26 +++++++++++-- spec/lib/formatter_spec.rb | 77 ++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index a44e5ed3ed..43893915d4 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -13,10 +13,9 @@ class Formatter return reformat(status.content) unless status.local? html = status.text - html = encode(html) + html = encode_and_link_urls(html) html = simple_format(html, {}, sanitize: false) html = html.delete("\n") - html = link_urls(html) html = link_mentions(html, status.mentions) html = link_hashtags(html) @@ -35,8 +34,7 @@ class Formatter def simplified_format(account) return reformat(account.note) unless account.local? - html = encode(account.note) - html = link_urls(html) + html = encode_and_link_urls(account.note) html = link_accounts(html) html = link_hashtags(html) @@ -49,6 +47,26 @@ class Formatter HTMLEntities.new.encode(html) end + def encode_and_link_urls(html) + entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false) + entities = entities.sort_by { |entity| entity[:indices].first } + + chars = html.to_s.to_char_a + html_attrs = { + target: '_blank', + rel: 'nofollow noopener', + } + result = '' + + last_index = entities.reduce(0) do |index, entity| + indices = entity[:indices] + result += encode(chars[index...indices.first].join) + result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), entity[:url], html_attrs) + indices.last + end + result += encode(chars[last_index..-1].join) + end + def link_urls(html) Twitter::Autolink.auto_link_urls(html, url_target: '_blank', link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' }, diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index 4b003b8e50..b70231d265 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -2,7 +2,8 @@ require 'rails_helper' RSpec.describe Formatter do let(:account) { Fabricate(:account, username: 'alice') } - let(:local_status) { Fabricate(:status, text: 'Hello world http://google.com', account: account) } + let(:local_text) { 'Hello world http://google.com' } + let(:local_status) { Fabricate(:status, text: local_text, account: account) } let(:remote_status) { Fabricate(:status, text: ' Beep boop', uri: 'beepboop', account: account) } describe '#format' do @@ -20,35 +21,81 @@ RSpec.describe Formatter do expect(subject).to match('google.com') end -=begin - it 'matches a stand-alone medium URL' do - expect(subject.match('https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4')[0]).to eq 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' + context 'matches a stand-alone medium URL' do + let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } + it 'has valid url' do + expect(subject).to include('href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"') + end end - it 'matches a stand-alone google URL' do - expect(subject.match('http://google.com')[0]).to eq 'http://google.com' + context 'matches a stand-alone google URL' do + let(:local_text) { 'http://google.com' } + it 'has valid url' do + expect(subject).to include('href="http://google.com"') + end end - it 'matches a URL without trailing period' do - expect(subject.match('http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ')[0]).to eq 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona' + context 'matches a URL without trailing period' do + let(:local_text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } + it 'has valid url' do + expect(subject).to include('href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"') + end end +=begin it 'matches a URL without closing paranthesis' do expect(subject.match('(http://google.com/)')[0]).to eq 'http://google.com' end +=end + + context 'matches a URL without exclamation point' do + let(:local_text) { 'http://www.google.com!' } + it 'has valid url' do + expect(subject).to include('href="http://www.google.com"') + end + end - it 'matches a URL without exclamation point' do - expect(subject.match('http://www.google.com! ')[0]).to eq 'http://www.google.com' + context 'matches a URL without single quote' do + let(:local_text) { "http://www.google.com'" } + it 'has valid url' do + expect(subject).to include('href="http://www.google.com"') + end end - it 'matches a URL with a query string' do - expect(subject.match('https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink')[0]).to eq 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' + context 'matches a URL without angle brackets' do + let(:local_text) { 'http://www.google.com>' } + it 'has valid url' do + expect(subject).to include('href="http://www.google.com"') + end end - it 'matches a URL with parenthesis in it' do - expect(subject.match('https://en.wikipedia.org/wiki/Diaspora_(software)')[0]).to eq 'https://en.wikipedia.org/wiki/Diaspora_(software)' + context 'matches a URL with a query string' do + let(:local_text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } + it 'has valid url' do + expect(subject).to include('href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"') + end + end + + context 'matches a URL with parenthesis in it' do + let(:local_text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } + it 'has valid url' do + expect(subject).to include('href="https://en.wikipedia.org/wiki/Diaspora_(software)"') + end + end + + context 'contains html (script tag)' do + let(:local_text) { '' } + it 'has valid url' do + expect(subject).to match '

<script>alert("Hello")</script>

' + end + end + + context 'contains html (xss attack)' do + let(:local_text) { %q{} } + it 'has valid url' do + expect(subject).to match '

<img src="javascript:alert('XSS');">

' + end end -=end end describe '#reformat' do