Don't add \b to whole-word keywords that don't start with word characters.
Ditto for ending with \b. Consider muting the phrase "(hot take)". I stipulate it is reasonable to enter this with the default "match whole word" behavior. Under the old behavior, this would be encoded as \b\(hot\ take\)\b However, if \b is before the first character in the string and the first character in the string is not a word character, then the match will fail. Ditto for after. In our example, "(" is not a word character, so this will not match statuses containing "(hot take)", and that's a very surprising behavior. To address this, we only add leading and trailing \b to keywords that start or end with word characters.
This commit is contained in:
parent
8d69329b8e
commit
3b2bf30644
2 changed files with 34 additions and 14 deletions
|
@ -19,35 +19,49 @@ class Glitch::KeywordMute < ApplicationRecord
|
|||
after_commit :invalidate_cached_matcher
|
||||
|
||||
def self.matcher_for(account_id)
|
||||
Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) }
|
||||
Matcher.new(account_id)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def invalidate_cached_matcher
|
||||
Rails.cache.delete("keyword_mutes:matcher:#{account_id}")
|
||||
Rails.cache.delete("keyword_mutes:regex:#{account_id}")
|
||||
end
|
||||
|
||||
class Matcher
|
||||
attr_reader :account_id
|
||||
attr_reader :regex
|
||||
|
||||
def initialize(account_id)
|
||||
re = [].tap do |arr|
|
||||
Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m|
|
||||
boundary = m.whole_word ? '\b' : ''
|
||||
arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}"
|
||||
@account_id = account_id
|
||||
@regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account }
|
||||
end
|
||||
|
||||
def keywords
|
||||
Glitch::KeywordMute.
|
||||
where(account_id: account_id).
|
||||
select(:keyword, :id, :whole_word)
|
||||
end
|
||||
|
||||
def regex_for_account
|
||||
re_text = [].tap do |arr|
|
||||
keywords.find_each do |kw|
|
||||
arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword))
|
||||
end
|
||||
end.join('|')
|
||||
|
||||
@regex = /#{re}/i unless re.empty?
|
||||
/#{re_text}/i unless re_text.empty?
|
||||
end
|
||||
|
||||
def boundary_regex_for_keyword(keyword)
|
||||
sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
|
||||
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
|
||||
|
||||
"#{sb}#{Regexp.escape(keyword)}#{eb}"
|
||||
end
|
||||
|
||||
def =~(str)
|
||||
regex ? regex =~ str : false
|
||||
end
|
||||
|
||||
def matches?(str)
|
||||
!!(regex =~ str)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,7 +7,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do
|
|||
describe '.matcher_for' do
|
||||
let(:matcher) { Glitch::KeywordMute.matcher_for(alice) }
|
||||
|
||||
describe 'with no Glitch::KeywordMutes for an account' do
|
||||
describe 'with no mutes' do
|
||||
before do
|
||||
Glitch::KeywordMute.delete_all
|
||||
end
|
||||
|
@ -17,7 +17,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do
|
|||
end
|
||||
end
|
||||
|
||||
describe 'with Glitch::KeywordMutes for an account' do
|
||||
describe 'with mutes' do
|
||||
it 'does not match keywords set by a different account' do
|
||||
Glitch::KeywordMute.create!(account: bob, keyword: 'take')
|
||||
|
||||
|
@ -63,7 +63,13 @@ RSpec.describe Glitch::KeywordMute, type: :model do
|
|||
it 'matches keywords surrounded by non-alphanumeric ornamentation' do
|
||||
Glitch::KeywordMute.create!(account: alice, keyword: 'hot')
|
||||
|
||||
expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy
|
||||
expect(matcher =~ '(hot take)').to be_truthy
|
||||
end
|
||||
|
||||
it 'escapes metacharacters in keywords' do
|
||||
Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)')
|
||||
|
||||
expect(matcher =~ '(hot take)').to be_truthy
|
||||
end
|
||||
|
||||
it 'uses case-folding rules appropriate for more than just English' do
|
||||
|
|
Loading…
Reference in a new issue