Fix URL scanning in note length validator and preview card fetching (#15827)

* Add tests

* Fix URL scanning in note length validator and preview card fetching
This commit is contained in:
Claire 2021-03-04 00:12:26 +01:00 committed by GitHub
parent a33f8f787a
commit 53d99e7426
6 changed files with 52 additions and 16 deletions

View file

@ -69,7 +69,7 @@ class LanguageDetector
def simplify_text(text) def simplify_text(text)
new_text = remove_html(text) new_text = remove_html(text)
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '') new_text.gsub!(FetchLinkCardService::URL_PATTERN, '\1')
new_text.gsub!(Account::MENTION_RE, '') new_text.gsub!(Account::MENTION_RE, '')
new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase } new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '') new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')

View file

@ -2,12 +2,13 @@
class FetchLinkCardService < BaseService class FetchLinkCardService < BaseService
URL_PATTERN = %r{ URL_PATTERN = %r{
( # $1 URL (#{Twitter::TwitterText::Regex[:valid_url_preceding_chars]}) # $1 preceeding chars
(https?:\/\/) # $2 Protocol (required) ( # $2 URL
(#{Twitter::TwitterText::Regex[:valid_domain]}) # $3 Domain(s) (https?:\/\/) # $3 Protocol (required)
(?::(#{Twitter::TwitterText::Regex[:valid_port_number]}))? # $4 Port number (optional) (#{Twitter::TwitterText::Regex[:valid_domain]}) # $4 Domain(s)
(/#{Twitter::TwitterText::Regex[:valid_url_path]}*)? # $5 URL Path and anchor (?::(#{Twitter::TwitterText::Regex[:valid_port_number]}))? # $5 Port number (optional)
(\?#{Twitter::TwitterText::Regex[:valid_url_query_chars]}*#{Twitter::TwitterText::Regex[:valid_url_query_ending_chars]})? # $6 Query String (/#{Twitter::TwitterText::Regex[:valid_url_path]}*)? # $6 URL Path and anchor
(\?#{Twitter::TwitterText::Regex[:valid_url_query_chars]}*#{Twitter::TwitterText::Regex[:valid_url_query_ending_chars]})? # $7 Query String
) )
}iox }iox
@ -63,7 +64,7 @@ class FetchLinkCardService < BaseService
def parse_urls def parse_urls
if @status.local? if @status.local?
urls = @status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[0]).normalize } urls = @status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[1]).normalize }
else else
html = Nokogiri::HTML(@status.text) html = Nokogiri::HTML(@status.text)
links = html.css('a') links = html.css('a')

View file

@ -15,7 +15,7 @@ class NoteLengthValidator < ActiveModel::EachValidator
return '' if value.nil? return '' if value.nil?
value.dup.tap do |new_text| value.dup.tap do |new_text|
new_text.gsub!(FetchLinkCardService::URL_PATTERN, 'x' * 23) new_text.gsub!(FetchLinkCardService::URL_PATTERN, StatusLengthValidator::URL_PLACEHOLDER)
new_text.gsub!(Account::MENTION_RE, '@\2') new_text.gsub!(Account::MENTION_RE, '@\2')
end end
end end

View file

@ -2,12 +2,6 @@
class StatusLengthValidator < ActiveModel::Validator class StatusLengthValidator < ActiveModel::Validator
MAX_CHARS = 500 MAX_CHARS = 500
URL_PATTERN = %r{
(?:
(#{Twitter::TwitterText::Regex[:valid_url_preceding_chars]})
(#{FetchLinkCardService::URL_PATTERN})
)
}iox
URL_PLACEHOLDER = "\1#{'x' * 23}" URL_PLACEHOLDER = "\1#{'x' * 23}"
def validate(status) def validate(status)
@ -35,7 +29,7 @@ class StatusLengthValidator < ActiveModel::Validator
return '' if @status.text.nil? return '' if @status.text.nil?
@status.text.dup.tap do |new_text| @status.text.dup.tap do |new_text|
new_text.gsub!(URL_PATTERN, URL_PLACEHOLDER) new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER)
new_text.gsub!(Account::MENTION_RE, '@\2') new_text.gsub!(Account::MENTION_RE, '@\2')
end end
end end

View file

@ -77,6 +77,14 @@ RSpec.describe FetchLinkCardService, type: :service do
expect(a_request(:get, 'http://example.com/test-')).to have_been_made.at_least_once expect(a_request(:get, 'http://example.com/test-')).to have_been_made.at_least_once
end end
end end
context do
let(:status) { Fabricate(:status, text: 'testhttp://example.com/sjis') }
it 'does not fetch URLs with not isolated from their surroundings' do
expect(a_request(:get, 'http://example.com/sjis')).to_not have_been_made
end
end
end end
context 'in a remote status' do context 'in a remote status' do

View file

@ -0,0 +1,33 @@
# frozen_string_literal: true
require 'rails_helper'
describe NoteLengthValidator do
subject { NoteLengthValidator.new(attributes: { note: true }, maximum: 500) }
describe '#validate' do
it 'adds an error when text is over 500 characters' do
text = 'a' * 520
account = double(note: text, errors: double(add: nil))
subject.validate_each(account, 'note', text)
expect(account.errors).to have_received(:add)
end
it 'counts URLs as 23 characters flat' do
text = ('a' * 476) + " http://#{'b' * 30}.com/example"
account = double(note: text, errors: double(add: nil))
subject.validate_each(account, 'note', text)
expect(account.errors).to_not have_received(:add)
end
it 'does not count non-autolinkable URLs as 23 characters flat' do
text = ('a' * 476) + "http://#{'b' * 30}.com/example"
account = double(note: text, errors: double(add: nil))
subject.validate_each(account, 'note', text)
expect(account.errors).to have_received(:add)
end
end
end