Merge remote-tracking branch 'VyrCossont/search-all-visible-toots'

This commit is contained in:
Skye 2023-02-04 19:57:12 +09:00
commit 388300482c
Signed by: me
GPG key ID: 0104BC05F41B77B8
12 changed files with 160 additions and 14 deletions

View file

@ -238,7 +238,7 @@ SMTP_FROM_ADDRESS=notifications@example.com
# SAML_ATTRIBUTES_STATEMENTS_FULL_NAME="urn:oid:2.16.840.1.113730.3.1.241"
# SAML_ATTRIBUTES_STATEMENTS_FIRST_NAME="urn:oid:2.5.4.42"
# SAML_ATTRIBUTES_STATEMENTS_LAST_NAME="urn:oid:2.5.4.4"
# SAML_UID_ATTRIBUTE="urn:oid:0.9.2342.19200300.100.1.1"
# SAML_UID_ATTRIBUTE="urn:oid:0.9.2342.19200300.100.1.1"
# SAML_ATTRIBUTES_STATEMENTS_VERIFIED=
# SAML_ATTRIBUTES_STATEMENTS_VERIFIED_EMAIL=
@ -286,6 +286,12 @@ MAX_REACTIONS=1
# Customize the number of hashtags shown in 'Explore'
# MAX_TRENDING_TAGS=10
# Scope of full-text searches:
# - public: search any status with public visibility
# - public_or_unlisted: search any status with public or unlisted visibility
# - classic: searches only a user's own statuses, favs, bookmarks, and mentions
# SEARCH_SCOPE=public
# Maximum custom emoji file sizes
# If undefined or smaller than MAX_EMOJI_SIZE, the value
# of MAX_EMOJI_SIZE will be used for MAX_REMOTE_EMOJI_SIZE

View file

@ -65,6 +65,12 @@ class StatusesIndex < Chewy::Index
root date_detection: false do
field :id, type: 'long'
field :account_id, type: 'long'
field :created_at, type: 'date'
field :visibility, type: 'keyword'
field :domain, type: 'keyword', value: ->(status) { status.account.domain or Rails.configuration.x.local_domain }
field :lang, type: 'keyword', value: ->(status) { status.language }
field :is, type: 'keyword', value: ->(status) { status.searchable_is }
field :has, type: 'keyword', value: ->(status) { status.searchable_has }
field :text, type: 'text', value: ->(status) { status.searchable_text } do
field :stemmed, type: 'text', analyzer: 'content'

View file

@ -85,6 +85,8 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
attach_tags(@status)
end
StatusesIndex.import! @status if Chewy.enabled?
resolve_thread(@status)
fetch_replies(@status)
distribute

View file

@ -25,7 +25,9 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter
# on the results of the filter, so this filtering happens here instead
bulk.map! do |entry|
new_entry = begin
if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
if entry[:index] &&
entry.dig(:index, :data, 'searchable_by').blank? &&
Rails.configuration.x.search_scope == :classic
{ delete: entry[:index].except(:data) }
else
entry
@ -58,13 +60,21 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter
end
def scopes
[
classic_scopes = [
local_statuses_scope,
local_mentions_scope,
local_favourites_scope,
local_votes_scope,
local_bookmarks_scope,
]
case Rails.configuration.x.search_scope
when :public
classic_scopes + [public_scope]
when :public_or_unlisted
classic_scopes + [public_or_unlisted_scope]
else
classic_scopes
end
end
def local_mentions_scope
@ -86,4 +96,12 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter
def local_statuses_scope
Status.local.select('"statuses"."id", COALESCE("statuses"."reblog_of_id", "statuses"."id") AS status_id')
end
def public_scope
Status.with_public_visibility.select('"statuses"."id", "statuses"."id" AS status_id')
end
def public_or_unlisted_scope
Status.with_public_or_unlisted_visibility.select('"statuses"."id", "statuses"."id" AS status_id')
end
end

View file

@ -9,7 +9,7 @@ class SearchQueryParser < Parslet::Parser
rule(:prefix) { (term >> colon).as(:prefix) }
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term | shortcode)).as(:clause) }
rule(:clause) { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) }
rule(:query) { (clause >> space.maybe).repeat.as(:query) }
root(:query)
end

View file

@ -2,7 +2,7 @@
class SearchQueryTransformer < Parslet::Transform
class Query
attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses
attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses, :order_clauses
def initialize(clauses)
grouped = clauses.chunk(&:operator).to_h
@ -10,13 +10,20 @@ class SearchQueryTransformer < Parslet::Transform
@must_not_clauses = grouped.fetch(:must_not, [])
@must_clauses = grouped.fetch(:must, [])
@filter_clauses = grouped.fetch(:filter, [])
@order_clauses = grouped.fetch(:order, [])
end
def apply(search)
def apply(search, account)
should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) }
filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause, account)) }
if order_clauses.empty?
# Default to most recent results first.
search = search.order(created_at: :desc)
else
order_clauses.each { |clause| search = search.order(**clause_to_order(clause)) }
end
search.query.minimum_should_match(1)
end
@ -28,15 +35,26 @@ class SearchQueryTransformer < Parslet::Transform
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
when PhraseClause
{ match_phrase: { text: { query: clause.phrase } } }
when PrefixClause
{ term: { clause.filter => clause.term } }
else
raise "Unexpected clause type: #{clause}"
end
end
def clause_to_filter(clause)
def clause_to_filter(clause, account)
case clause
when PrefixClause
{ term: { clause.filter => clause.term } }
{ clause.query => { clause.filter => clause.term == :account_id_placeholder ? account.id : clause.term } }
else
raise "Unexpected clause type: #{clause}"
end
end
def clause_to_order(clause)
case clause
when PrefixClause
{ clause.term => clause.order }
else
raise "Unexpected clause type: #{clause}"
end
@ -81,11 +99,35 @@ class SearchQueryTransformer < Parslet::Transform
end
class PrefixClause
attr_reader :filter, :operator, :term
attr_reader :filter, :operator, :term, :order, :query
def initialize(prefix, operator, term)
@query = :term
case operator
when '+', nil
@operator = :filter
when '-'
@operator = :must_not
else
raise "Unknown operator: #{str}"
end
def initialize(prefix, term)
@operator = :filter
case prefix
when 'domain', 'is', 'has', 'lang', 'visibility'
@filter = prefix.to_s
@term = term
when 'before', 'after'
@query = :range
@filter = 'created_at'
case prefix
when 'before'
@term = { lt: term }
when 'after'
@term = { gt: term }
else
raise Mastodon::SyntaxError
end
when 'from'
@filter = :account_id
@ -94,6 +136,25 @@ class SearchQueryTransformer < Parslet::Transform
account = Account.find_remote!(username, domain)
@term = account.id
when 'scope'
raise Mastodon::SyntaxError unless operator.nil?
raise Mastodon::SyntaxError unless term == 'classic'
@filter = 'searchable_by'
@term = :account_id_placeholder
when 'sort'
raise Mastodon::SyntaxError unless operator.nil?
@operator = :order
@term = :created_at
case term
when 'oldest'
@order = :asc
when 'newest'
@order = :desc
else
raise Mastodon::SyntaxError
end
else
raise Mastodon::SyntaxError
end
@ -105,7 +166,7 @@ class SearchQueryTransformer < Parslet::Transform
operator = clause[:operator]&.to_s
if clause[:prefix]
PrefixClause.new(prefix, clause[:term].to_s)
PrefixClause.new(prefix, operator, clause[:term].to_s)
elsif clause[:term]
TermClause.new(prefix, operator, clause[:term].to_s)
elsif clause[:shortcode]

View file

@ -104,6 +104,7 @@ class Status < ApplicationRecord
scope :without_replies, -> { where('statuses.reply = FALSE OR statuses.in_reply_to_account_id = statuses.account_id') }
scope :without_reblogs, -> { where('statuses.reblog_of_id IS NULL') }
scope :with_public_visibility, -> { where(visibility: :public) }
scope :with_public_or_unlisted_visibility, -> { where(visibility: [:public, :unlisted]) }
scope :tagged_with, ->(tag_ids) { joins(:statuses_tags).where(statuses_tags: { tag_id: tag_ids }) }
scope :excluding_silenced_accounts, -> { left_outer_joins(:account).where(accounts: { silenced_at: nil }) }
scope :including_silenced_accounts, -> { left_outer_joins(:account).where.not(accounts: { silenced_at: nil }) }
@ -188,6 +189,27 @@ class Status < ApplicationRecord
].compact.join("\n\n")
end
def searchable_is
keywords = []
keywords << :bot if account.bot?
keywords << :local if local?
keywords << :local_only if local_only
keywords << :reply if reply?
keywords << :sensitive if sensitive?
keywords
end
def searchable_has
keywords = []
keywords << :cw if spoiler_text?
keywords << :link if FetchLinkCardService.new.link?(self)
keywords << :media if media_attachments.present?
keywords << :mention if mentions.present?
keywords << :poll if preloadable_poll.present?
keywords << :tag if tags.present?
keywords
end
def to_log_human_identifier
account.acct
end

View file

@ -90,6 +90,8 @@ class ActivityPub::ProcessAccountService < BaseService
set_fetchable_attributes! unless @options[:only_key] || @account.suspended?
@account.save_with_optional_media!
AccountsIndex.import! @account if Chewy.enabled?
end
def set_immediate_protocol_attributes!

View file

@ -54,6 +54,8 @@ class ActivityPub::ProcessStatusUpdateService < BaseService
broadcast_updates!
end
StatusesIndex.import! @status if Chewy.enabled?
forward_activity! if significant_changes? && @status_parser.edited_at > last_edit_date
end

View file

@ -34,6 +34,14 @@ class FetchLinkCardService < BaseService
nil
end
##
# Borrow most of this machinery to detect whether the status has at least one link.
def link?(status)
@status = status
@original_url = parse_urls
!@original_url.nil?
end
private
def process_url

View file

@ -35,7 +35,14 @@ class SearchService < BaseService
end
def perform_statuses_search!
definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id }))
statuses_index = StatusesIndex.filter(term: { searchable_by: @account.id })
case Rails.configuration.x.search_scope
when :public
statuses_index = statuses_index.filter.or(term: { visibility: 'public' })
when :public_or_unlisted
statuses_index = statuses_index.filter.or(terms: { visibility: ['public', 'unlisted'] })
end
definition = parsed_query.apply(statuses_index, @account)
if @options[:account_id].present?
definition = definition.filter(term: { account_id: @options[:account_id] })

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
Rails.application.configure do
config.x.search_scope = case
when ENV['SEARCH_SCOPE'] == 'public'
:public
when ENV['SEARCH_SCOPE'] == 'public_or_unlisted'
:public_or_unlisted
else
:classic
end
end