diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 65cbb6fcd0..d119f7cacf 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -57,7 +57,7 @@ class StatusesIndex < Chewy::Index field :id, type: 'long' field :account_id, type: 'long' - field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do + field :text, type: 'text', value: ->(status) { [status.spoiler_text, PlainTextFormatter.new(status.text, status.local?).to_s].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do field :stemmed, type: 'text', analyzer: 'content' end diff --git a/app/controllers/api/web/embeds_controller.rb b/app/controllers/api/web/embeds_controller.rb index 741ba910fb..58f6345e68 100644 --- a/app/controllers/api/web/embeds_controller.rb +++ b/app/controllers/api/web/embeds_controller.rb @@ -15,7 +15,7 @@ class Api::Web::EmbedsController < Api::Web::BaseController return not_found if oembed.nil? begin - oembed[:html] = Formatter.instance.sanitize(oembed[:html], Sanitize::Config::MASTODON_OEMBED) + oembed[:html] = Sanitize.fragment(oembed[:html], Sanitize::Config::MASTODON_OEMBED) rescue ArgumentError return not_found end diff --git a/app/helpers/accounts_helper.rb b/app/helpers/accounts_helper.rb index a339617247..557f60f26f 100644 --- a/app/helpers/accounts_helper.rb +++ b/app/helpers/accounts_helper.rb @@ -2,10 +2,12 @@ module AccountsHelper def display_name(account, **options) + str = account.display_name.presence || account.username + if options[:custom_emojify] - Formatter.instance.format_display_name(account, **options) + prerender_custom_emojis(h(str), account.emojis) else - account.display_name.presence || account.username + str end end diff --git a/app/helpers/admin/trends/statuses_helper.rb b/app/helpers/admin/trends/statuses_helper.rb index d16e3dd121..214c1e2a68 100644 --- a/app/helpers/admin/trends/statuses_helper.rb +++ b/app/helpers/admin/trends/statuses_helper.rb @@ -12,9 +12,6 @@ module Admin::Trends::StatusesHelper return '' if text.blank? - html = Formatter.instance.send(:encode, text) - html = Formatter.instance.send(:encode_custom_emojis, html, status.emojis, prefers_autoplay?) - - html.html_safe # rubocop:disable Rails/OutputSafety + prerender_custom_emojis(h(text), status.emojis) end end diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index e997570b59..651a98a854 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -239,4 +239,8 @@ module ApplicationHelper end end.values end + + def prerender_custom_emojis(html, custom_emojis) + EmojiFormatter.new(html, custom_emojis, animate: prefers_autoplay?).to_s + end end diff --git a/app/helpers/formatting_helper.rb b/app/helpers/formatting_helper.rb new file mode 100644 index 0000000000..66e9e1e91e --- /dev/null +++ b/app/helpers/formatting_helper.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module FormattingHelper + def html_aware_format(text, local, options = {}) + HtmlAwareFormatter.new(text, local, options).to_s + end + + def linkify(text, options = {}) + TextFormatter.new(text, options).to_s + end + + def extract_plain_text(text, local) + PlainTextFormatter.new(text, local).to_s + end + + def status_content_format(status) + html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : [])) + end +end diff --git a/app/helpers/routing_helper.rb b/app/helpers/routing_helper.rb index fb24a1b28c..f95f46a560 100644 --- a/app/helpers/routing_helper.rb +++ b/app/helpers/routing_helper.rb @@ -2,6 +2,7 @@ module RoutingHelper extend ActiveSupport::Concern + include Rails.application.routes.url_helpers include ActionView::Helpers::AssetTagHelper include Webpacker::Helper @@ -22,8 +23,6 @@ module RoutingHelper full_asset_url(asset_pack_path(source, **options)) end - private - def use_storage? Rails.configuration.x.use_s3 || Rails.configuration.x.use_swift end diff --git a/app/helpers/statuses_helper.rb b/app/helpers/statuses_helper.rb index d328f89b7a..e92b4c8390 100644 --- a/app/helpers/statuses_helper.rb +++ b/app/helpers/statuses_helper.rb @@ -113,20 +113,6 @@ module StatusesHelper end end - private - - def simplified_text(text) - text.dup.tap do |new_text| - URI.extract(new_text).each do |url| - new_text.gsub!(url, '') - end - - new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE, '') - new_text.gsub!(/\s+/, '') - end - end - def embedded_view? params[:controller] == EMBEDDED_CONTROLLER && params[:action] == EMBEDDED_ACTION end diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index ea8d146d44..f4f98e29ce 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class ActivityPub::Activity::Create < ActivityPub::Activity + include FormattingHelper + def perform dereference_object! @@ -367,7 +369,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity end def converted_text - Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) + linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) end def unsupported_media_type?(mime_type) diff --git a/app/lib/emoji_formatter.rb b/app/lib/emoji_formatter.rb new file mode 100644 index 0000000000..f808f3a226 --- /dev/null +++ b/app/lib/emoji_formatter.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +class EmojiFormatter + include RoutingHelper + + DISALLOWED_BOUNDING_REGEX = /[[:alnum:]:]/.freeze + + attr_reader :html, :custom_emojis, :options + + # @param [ActiveSupport::SafeBuffer] html + # @param [Array] custom_emojis + # @param [Hash] options + # @option options [Boolean] :animate + def initialize(html, custom_emojis, options = {}) + raise ArgumentError unless html.html_safe? + + @html = html + @custom_emojis = custom_emojis + @options = options + end + + def to_s + return html if custom_emojis.empty? || html.blank? + + i = -1 + tag_open_index = nil + inside_shortname = false + shortname_start_index = -1 + invisible_depth = 0 + last_index = 0 + result = ''.dup + + while i + 1 < html.size + i += 1 + + if invisible_depth.zero? && inside_shortname && html[i] == ':' + inside_shortname = false + shortcode = html[shortname_start_index + 1..i - 1] + char_after = html[i + 1] + + next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode]) + + result << html[last_index..shortname_start_index - 1] if shortname_start_index.positive? + result << image_for_emoji(shortcode, emoji) + last_index = i + 1 + elsif tag_open_index && html[i] == '>' + tag = html[tag_open_index..i] + tag_open_index = nil + + if invisible_depth.positive? + invisible_depth += count_tag_nesting(tag) + elsif tag == '' - end - end - - context 'given a post containing unlinkable mentions' do - let(:status) { Fabricate(:status, text: '@alice', uri: nil) } - - it 'does not create a mention link' do - is_expected.to include '@alice' - end - end - - context do - subject do - status = Fabricate(:status, text: text, uri: nil) - Formatter.instance.format(status) - end - - include_examples 'encode and link URLs' - end - - context 'given a post with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji) } - let(:status) { Fabricate(:status, account: local_account, text: text) } - - subject { Formatter.instance.format(status, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { ':coolcat: Beep boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/

:coolcat::coolcat: Beep boop
' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/

:coolcat:Beep :coolcat: boop

' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/Beep :coolcat::coolcat::coolcat:

' } - - it 'does not touch the shortcodes' do - is_expected.to match(/

:coolcat::coolcat:<\/p>/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { '

Beep boop
:coolcat:

' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/
:coolcat:alert("Hello")' } - - it 'strips the scripts' do - is_expected.to_not include '' - end - end - - context 'given a post containing malicious classes' do - let(:text) { 'Show more' } - - it 'strips the malicious classes' do - is_expected.to_not include 'status__content__spoiler-link' - end - end - end - - describe '#plaintext' do - subject { Formatter.instance.plaintext(status) } - - context 'given a post with local status' do - let(:status) { Fabricate(:status, text: '

a text by a nerd who uses an HTML tag in text

', uri: nil) } - - it 'returns the raw text' do - is_expected.to eq '

a text by a nerd who uses an HTML tag in text

' - end - end - - context 'given a post with remote status' do - let(:status) { Fabricate(:status, account: remote_account, text: '') } - - it 'returns tag-stripped text' do - is_expected.to eq '' - end - end - end - - describe '#simplified_format' do - subject { Formatter.instance.simplified_format(account) } - - context 'given a post with local status' do - let(:account) { Fabricate(:account, domain: nil, note: text) } - - context 'given a post containing linkable mentions for local accounts' do - let(:text) { '@alice' } - - before { local_account } - - it 'creates a mention link' do - is_expected.to eq '

@alice

' - end - end - - context 'given a post containing linkable mentions for remote accounts' do - let(:text) { '@bob@remote.test' } - - before { remote_account } - - it 'creates a mention link' do - is_expected.to eq '

@bob

' - end - end - - context 'given a post containing unlinkable mentions' do - let(:text) { '@alice' } - - it 'does not create a mention link' do - is_expected.to eq '

@alice

' - end - end - - context 'given a post with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji) } - - before { account.note = text } - subject { Formatter.instance.simplified_format(account, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { ':coolcat: Beep boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/

:coolcat:alert("Hello")' } - let(:account) { Fabricate(:account, domain: 'remote', note: text) } - - it 'reformats' do - is_expected.to_not include '' - end - - context 'with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) } - - before { remote_account.note = text } - - subject { Formatter.instance.simplified_format(remote_account, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { '

:coolcat: Beep boop
' } - - it 'converts shortcode to image tag' do - is_expected.to match(/

:coolcat:Beep :coolcat: boop

' } - - it 'converts shortcode to image tag' do - is_expected.to match(/Beep :coolcat::coolcat::coolcat:

' } - - it 'does not touch the shortcodes' do - is_expected.to match(/

:coolcat::coolcat:<\/p>/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { '

Beep boop
:coolcat:

' } - - it 'converts shortcode to image tag' do - is_expected.to match(/
:coolcat:alert("Hello")' } - - subject { Formatter.instance.sanitize(html, Sanitize::Config::MASTODON_STRICT) } - - it 'sanitizes' do - is_expected.to eq '' - end - end -end diff --git a/spec/lib/html_aware_formatter.rb b/spec/lib/html_aware_formatter.rb new file mode 100644 index 0000000000..18d23abf56 --- /dev/null +++ b/spec/lib/html_aware_formatter.rb @@ -0,0 +1,44 @@ +require 'rails_helper' + +RSpec.describe HtmlAwareFormatter do + describe '#to_s' do + subject { described_class.new(text, local).to_s } + + context 'when local' do + let(:local) { true } + let(:text) { 'Foo bar' } + + it 'returns formatted text' do + is_expected.to eq '

Foo bar

' + end + end + + context 'when remote' do + let(:local) { false } + + context 'given plain text' do + let(:text) { 'Beep boop' } + + it 'keeps the plain text' do + is_expected.to include 'Beep boop' + end + end + + context 'given text containing script tags' do + let(:text) { '' } + + it 'strips the scripts' do + is_expected.to_not include '' + end + end + + context 'given text containing malicious classes' do + let(:text) { 'Show more' } + + it 'strips the malicious classes' do + is_expected.to_not include 'status__content__spoiler-link' + end + end + end + end +end diff --git a/spec/lib/plain_text_formatter_spec.rb b/spec/lib/plain_text_formatter_spec.rb new file mode 100644 index 0000000000..c3d0ee6301 --- /dev/null +++ b/spec/lib/plain_text_formatter_spec.rb @@ -0,0 +1,24 @@ +require 'rails_helper' + +RSpec.describe PlainTextFormatter do + describe '#to_s' do + subject { described_class.new(status.text, status.local?).to_s } + + context 'given a post with local status' do + let(:status) { Fabricate(:status, text: '

a text by a nerd who uses an HTML tag in text

', uri: nil) } + + it 'returns the raw text' do + is_expected.to eq '

a text by a nerd who uses an HTML tag in text

' + end + end + + context 'given a post with remote status' do + let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') } + let(:status) { Fabricate(:status, account: remote_account, text: '

Hello

') } + + it 'returns tag-stripped text' do + is_expected.to eq 'Hello' + end + end + end +end diff --git a/spec/lib/text_formatter_spec.rb b/spec/lib/text_formatter_spec.rb new file mode 100644 index 0000000000..52a9d24988 --- /dev/null +++ b/spec/lib/text_formatter_spec.rb @@ -0,0 +1,313 @@ +require 'rails_helper' + +RSpec.describe TextFormatter do + describe '#to_s' do + let(:preloaded_accounts) { nil } + + subject { described_class.new(text, preloaded_accounts: preloaded_accounts).to_s } + + context 'given text containing plain text' do + let(:text) { 'text' } + + it 'paragraphizes the text' do + is_expected.to eq '

text

' + end + end + + context 'given text containing line feeds' do + let(:text) { "line\nfeed" } + + it 'removes line feeds' do + is_expected.not_to include "\n" + end + end + + context 'given text containing linkable mentions' do + let(:preloaded_accounts) { [Fabricate(:account, username: 'alice')] } + let(:text) { '@alice' } + + it 'creates a mention link' do + is_expected.to include '@alice' + end + end + + context 'given text containing unlinkable mentions' do + let(:preloaded_accounts) { [] } + let(:text) { '@alice' } + + it 'does not create a mention link' do + is_expected.to include '@alice' + end + end + + context 'given a stand-alone medium URL' do + let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } + + it 'matches the full URL' do + is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"' + end + end + + context 'given a stand-alone google URL' do + let(:text) { 'http://google.com' } + + it 'matches the full URL' do + is_expected.to include 'href="http://google.com"' + end + end + + context 'given a stand-alone URL with a newer TLD' do + let(:text) { 'http://example.gay' } + + it 'matches the full URL' do + is_expected.to include 'href="http://example.gay"' + end + end + + context 'given a stand-alone IDN URL' do + let(:text) { 'https://nic.みんな/' } + + it 'matches the full URL' do + is_expected.to include 'href="https://nic.みんな/"' + end + + it 'has display URL' do + is_expected.to include 'nic.みんな/' + end + end + + context 'given a URL with a trailing period' do + let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } + + it 'matches the full URL but not the period' do + is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"' + end + end + + context 'given a URL enclosed with parentheses' do + let(:text) { '(http://google.com/)' } + + it 'matches the full URL but not the parentheses' do + is_expected.to include 'href="http://google.com/"' + end + end + + context 'given a URL with a trailing exclamation point' do + let(:text) { 'http://www.google.com!' } + + it 'matches the full URL but not the exclamation point' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a trailing single quote' do + let(:text) { "http://www.google.com'" } + + it 'matches the full URL but not the single quote' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a trailing angle bracket' do + let(:text) { 'http://www.google.com>' } + + it 'matches the full URL but not the angle bracket' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a query string' do + context 'with escaped unicode character' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"' + end + end + + context 'with unicode character' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&q=autolink"' + end + end + + context 'with unicode character at the end' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"' + end + end + + context 'with escaped and not escaped unicode characters' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' } + + it 'preserves escaped unicode characters' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink"' + end + end + end + + context 'given a URL with parentheses in it' do + let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } + + it 'matches the full URL' do + is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"' + end + end + + context 'given a URL in quotation marks' do + let(:text) { '"https://example.com/"' } + + it 'does not match the quotation marks' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL in angle brackets' do + let(:text) { '' } + + it 'does not match the angle brackets' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL with Japanese path string' do + let(:text) { 'https://ja.wikipedia.org/wiki/日本' } + + it 'matches the full URL' do + is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"' + end + end + + context 'given a URL with Korean path string' do + let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' } + + it 'matches the full URL' do + is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"' + end + end + + context 'given a URL with a full-width space' do + let(:text) { 'https://example.com/ abc123' } + + it 'does not match the full-width space' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL in Japanese quotation marks' do + let(:text) { '「[https://example.org/」' } + + it 'does not match the quotation marks' do + is_expected.to include 'href="https://example.org/"' + end + end + + context 'given a URL with Simplified Chinese path string' do + let(:text) { 'https://baike.baidu.com/item/中华人民共和国' } + + it 'matches the full URL' do + is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"' + end + end + + context 'given a URL with Traditional Chinese path string' do + let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' } + + it 'matches the full URL' do + is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"' + end + end + + context 'given a URL containing unsafe code (XSS attack, visible part)' do + let(:text) { %q{http://example.com/bb} } + + it 'does not include the HTML in the URL' do + is_expected.to include '"http://example.com/b"' + end + + it 'escapes the HTML' do + is_expected.to include '<del>b</del>' + end + end + + context 'given a URL containing unsafe code (XSS attack, invisible part)' do + let(:text) { %q{http://example.com/blahblahblahblah/a} } + + it 'does not include the HTML in the URL' do + is_expected.to include '"http://example.com/blahblahblahblah/a"' + end + + it 'escapes the HTML' do + is_expected.to include '<script>alert("Hello")</script>' + end + end + + context 'given text containing HTML code (script tag)' do + let(:text) { '' } + + it 'escapes the HTML' do + is_expected.to include '

<script>alert("Hello")</script>

' + end + end + + context 'given text containing HTML (XSS attack)' do + let(:text) { %q{} } + + it 'escapes the HTML' do + is_expected.to include '

<img src="javascript:alert('XSS');">

' + end + end + + context 'given an invalid URL' do + let(:text) { 'http://www\.google\.com' } + + it 'outputs the raw URL' do + is_expected.to eq '

http://www\.google\.com

' + end + end + + context 'given text containing a hashtag' do + let(:text) { '#hashtag' } + + it 'creates a hashtag link' do + is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#hashtag' + end + end + + context 'given text containing a hashtag with Unicode chars' do + let(:text) { '#hashtagタグ' } + + it 'creates a hashtag link' do + is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#hashtagタグ' + end + end + + context 'given text with a stand-alone xmpp: URI' do + let(:text) { 'xmpp:user@instance.com' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:user@instance.com"' + end + end + + context 'given text with an xmpp: URI with a query-string' do + let(:text) { 'please join xmpp:muc@instance.com?join right now' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:muc@instance.com?join"' + end + end + + context 'given text containing a magnet: URI' do + let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' } + + it 'matches the full URI' do + is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"' + end + end + end +end