keyword mute: Store keywords as a list
This has a couple of advantages over the regex approach: - Keywords are individually addressable, which makes it easier to gather statistics (#363) - Keywords can be individually applied to different feeds, e.g. skipping mentions (#454) It *does* end up creating many more Regexp objects. I'm not yet sure if the difference is significant.
This commit is contained in:
parent
5ec2d25ef6
commit
d55d5ccc9d
1 changed files with 42 additions and 36 deletions
|
@ -33,68 +33,74 @@ class Glitch::KeywordMute < ApplicationRecord
|
||||||
Rails.cache.delete(TagMatcher.cache_key(account_id))
|
Rails.cache.delete(TagMatcher.cache_key(account_id))
|
||||||
end
|
end
|
||||||
|
|
||||||
class RegexpMatcher
|
class CachedKeywordMute
|
||||||
attr_reader :account_id
|
attr_reader :keyword
|
||||||
attr_reader :regex
|
attr_reader :whole_word
|
||||||
|
|
||||||
def initialize(account_id)
|
def initialize(keyword, whole_word)
|
||||||
@account_id = account_id
|
@keyword = keyword
|
||||||
regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text }
|
@whole_word = whole_word
|
||||||
@regex = /#{regex_text}/
|
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
def boundary_regex_for_keyword
|
||||||
|
|
||||||
def keywords
|
|
||||||
Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword)
|
|
||||||
end
|
|
||||||
|
|
||||||
def boundary_regex_for_keyword(keyword)
|
|
||||||
sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
|
sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
|
||||||
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
|
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
|
||||||
|
|
||||||
/(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
|
/(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def matches?(str)
|
||||||
|
str =~ (whole_word ? boundary_regex_for_keyword : /#{keyword}/i)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class TextMatcher < RegexpMatcher
|
class Matcher
|
||||||
|
attr_reader :account_id
|
||||||
|
attr_reader :words
|
||||||
|
|
||||||
|
def initialize(account_id)
|
||||||
|
@account_id = account_id
|
||||||
|
@words = Rails.cache.fetch(self.class.cache_key(account_id)) { fetch_keywords }
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def fetch_keywords
|
||||||
|
Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword).map do |whole_word, keyword|
|
||||||
|
CachedKeywordMute.new(transform_keyword(keyword), whole_word)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def transform_keyword(keyword)
|
||||||
|
keyword
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TextMatcher < Matcher
|
||||||
def self.cache_key(account_id)
|
def self.cache_key(account_id)
|
||||||
format('keyword_mutes:regex:text:%s', account_id)
|
format('keyword_mutes:regex:text:%s', account_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
def matches?(str)
|
def matches?(str)
|
||||||
!!(regex =~ str)
|
words.any? { |w| w.matches?(str) }
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def make_regex_text
|
|
||||||
kws = keywords.map! do |whole_word, keyword|
|
|
||||||
whole_word ? boundary_regex_for_keyword(keyword) : /(?i:#{Regexp.escape(keyword)})/
|
|
||||||
end
|
|
||||||
|
|
||||||
Regexp.union(kws).source
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class TagMatcher < RegexpMatcher
|
class TagMatcher < Matcher
|
||||||
def self.cache_key(account_id)
|
def self.cache_key(account_id)
|
||||||
format('keyword_mutes:regex:tag:%s', account_id)
|
format('keyword_mutes:regex:tag:%s', account_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
def matches?(tags)
|
def matches?(tags)
|
||||||
tags.pluck(:name).any? { |n| regex =~ n }
|
tags.pluck(:name).any? do |n|
|
||||||
|
words.any? { |w| w.matches?(n) }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
protected
|
||||||
|
|
||||||
def make_regex_text
|
def transform_keyword(kw)
|
||||||
kws = keywords.map! do |whole_word, keyword|
|
Tag::HASHTAG_RE =~ kw ? $1 : kw
|
||||||
term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword
|
|
||||||
whole_word ? boundary_regex_for_keyword(term) : term
|
|
||||||
end
|
|
||||||
|
|
||||||
Regexp.union(kws).source
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue