Fix #hashtag matching non-hashtagged posts in search (#26781)

This commit is contained in:
Eugen Rochko 2023-09-04 10:18:45 +02:00 committed by GitHub
parent cfd21eee96
commit 2c3f3e0390
4 changed files with 44 additions and 12 deletions

View file

@ -37,18 +37,29 @@ class PublicStatusesIndex < Chewy::Index
english_stemmer english_stemmer
), ),
}, },
hashtag: {
tokenizer: 'keyword',
filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
},
}, },
} }
index_scope ::Status.unscoped index_scope ::Status.unscoped
.kept .kept
.indexable .indexable
.includes(:media_attachments, :preloadable_poll, :preview_cards) .includes(:media_attachments, :preloadable_poll, :preview_cards, :tags)
root date_detection: false do root date_detection: false do
field(:id, type: 'long') field(:id, type: 'long')
field(:account_id, type: 'long') field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:language, type: 'keyword') field(:language, type: 'keyword')
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
field(:created_at, type: 'date') field(:created_at, type: 'date')

View file

@ -37,15 +37,26 @@ class StatusesIndex < Chewy::Index
english_stemmer english_stemmer
), ),
}, },
hashtag: {
tokenizer: 'keyword',
filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
},
}, },
} }
index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? } index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, :tags, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? }
root date_detection: false do root date_detection: false do
field(:id, type: 'long') field(:id, type: 'long')
field(:account_id, type: 'long') field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by }) field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by })
field(:language, type: 'keyword') field(:language, type: 'keyword')
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })

View file

@ -5,12 +5,21 @@ class TagsIndex < Chewy::Index
analyzer: { analyzer: {
content: { content: {
tokenizer: 'keyword', tokenizer: 'keyword',
filter: %w(lowercase asciifolding cjk_width), filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
}, },
edge_ngram: { edge_ngram: {
tokenizer: 'edge_ngram', tokenizer: 'edge_ngram',
filter: %w(lowercase asciifolding cjk_width), filter: %w(
lowercase
asciifolding
cjk_width
),
}, },
}, },
@ -30,12 +39,9 @@ class TagsIndex < Chewy::Index
end end
root date_detection: false do root date_detection: false do
field :name, type: 'text', analyzer: 'content' do field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') }
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? })
end field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts })
field(:last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at })
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
field :usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts }
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
end end
end end

View file

@ -53,9 +53,13 @@ class SearchQueryTransformer < Parslet::Transform
end end
def to_query def to_query
if @term.start_with?('#')
{ match: { tags: { query: @term } } }
else
{ multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } } { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
end end
end end
end
class PhraseClause class PhraseClause
attr_reader :operator, :phrase attr_reader :operator, :phrase