From 60654e8111ffebbd27e56bda7586c29740b2b5ca Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 10:43:26 -0800 Subject: [PATCH 01/20] Add SEARCH_ALL_VISIBLE_TOOTS env flag Context: https://docs.joinmastodon.org/user/network/#search Vanilla Mastodon intentionally refuses to search outside a user's own toots, favs, bookmarks, and mentions. This flag makes that restricted search behavior a per-instance choice, defaulting to the same behavior as vanilla Mastodon if the flag is absent. --- .env.production.sample | 5 +++++ app/chewy/statuses_index.rb | 1 + app/services/search_service.rb | 9 ++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.env.production.sample b/.env.production.sample index da4c7fe4c8..f4afcd729e 100644 --- a/.env.production.sample +++ b/.env.production.sample @@ -283,6 +283,11 @@ MAX_POLL_OPTION_CHARS=100 # Customize the number of hashtags shown in 'Explore' # MAX_TRENDING_TAGS=10 +# Search all visible toots +# (Normally searches only a user's own toots, favs, bookmarks, and mentions) +# Only relevant when elasticsearch is installed +# SEARCH_ALL_VISIBLE_TOOTS=true + # Maximum custom emoji file sizes # If undefined or smaller than MAX_EMOJI_SIZE, the value # of MAX_EMOJI_SIZE will be used for MAX_REMOTE_EMOJI_SIZE diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 6dd4fb18b0..a80c6eca23 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -71,5 +71,6 @@ class StatusesIndex < Chewy::Index end field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } + field :searchable_by_anyone, type: 'boolean', value: ->(status) { status.public_visibility? or status.unlisted_visibility? } end end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 1a76cbb388..158192bae1 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -1,6 +1,9 @@ # frozen_string_literal: true class SearchService < BaseService + + SEARCH_ALL_VISIBLE_TOOTS = ENV['SEARCH_ALL_VISIBLE_TOOTS'] == 'true' + def call(query, account, limit, options = {}) @query = query&.strip @account = account @@ -35,7 +38,11 @@ class SearchService < BaseService end def perform_statuses_search! - definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id })) + statuses_index = StatusesIndex.filter(term: { searchable_by: @account.id }) + if SEARCH_ALL_VISIBLE_TOOTS + statuses_index = statuses_index.or.filter(term: { searchable_by_anyone: true }) + end + definition = parsed_query.apply(statuses_index) if @options[:account_id].present? definition = definition.filter(term: { account_id: @options[:account_id] }) From f6f13f54cfcae79ddfb9c94e2cf8fdb9e97c1026 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 11:05:17 -0800 Subject: [PATCH 02/20] Add dependency for Chewy parallel reindexing --- Gemfile | 1 + Gemfile.lock | 1 + 2 files changed, 2 insertions(+) diff --git a/Gemfile b/Gemfile index 9843d1cee9..d43496a2f2 100644 --- a/Gemfile +++ b/Gemfile @@ -64,6 +64,7 @@ gem 'nokogiri', '~> 1.13' gem 'nsa', '~> 0.2' gem 'oj', '~> 3.13' gem 'ox', '~> 2.14' +gem 'parallel', '~> 1.22' gem 'parslet' gem 'posix-spawn' gem 'pundit', '~> 2.2' diff --git a/Gemfile.lock b/Gemfile.lock index 237edb61ce..99d29f342d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -804,6 +804,7 @@ DEPENDENCIES omniauth-rails_csrf_protection (~> 0.1) omniauth-saml (~> 1.10) ox (~> 2.14) + parallel (~> 1.22) parslet pg (~> 1.4) pghero (~> 2.8) From d3c1eaba2104c0aaa7e72479bef32c0a53c2335f Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 18:45:20 -0800 Subject: [PATCH 03/20] Change TOOTS to STATUSES --- .env.production.sample | 6 +++--- app/services/search_service.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.env.production.sample b/.env.production.sample index f4afcd729e..64b6e4979d 100644 --- a/.env.production.sample +++ b/.env.production.sample @@ -283,10 +283,10 @@ MAX_POLL_OPTION_CHARS=100 # Customize the number of hashtags shown in 'Explore' # MAX_TRENDING_TAGS=10 -# Search all visible toots -# (Normally searches only a user's own toots, favs, bookmarks, and mentions) +# Search all visible statuses +# (Normally searches only a user's own statuses, favs, bookmarks, and mentions) # Only relevant when elasticsearch is installed -# SEARCH_ALL_VISIBLE_TOOTS=true +# SEARCH_ALL_VISIBLE_STATUSES=true # Maximum custom emoji file sizes # If undefined or smaller than MAX_EMOJI_SIZE, the value diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 158192bae1..f76ebceb61 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -2,7 +2,7 @@ class SearchService < BaseService - SEARCH_ALL_VISIBLE_TOOTS = ENV['SEARCH_ALL_VISIBLE_TOOTS'] == 'true' + SEARCH_ALL_VISIBLE_STATUSES = ENV['SEARCH_ALL_VISIBLE_STATUSES'] == 'true' def call(query, account, limit, options = {}) @query = query&.strip From b831c3897dc483559fe0d1d6445d0cd7024dbcf7 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 20:06:19 -0800 Subject: [PATCH 04/20] Fix transposition of search DSL calls --- app/services/search_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index f76ebceb61..7f69557f38 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -40,7 +40,7 @@ class SearchService < BaseService def perform_statuses_search! statuses_index = StatusesIndex.filter(term: { searchable_by: @account.id }) if SEARCH_ALL_VISIBLE_TOOTS - statuses_index = statuses_index.or.filter(term: { searchable_by_anyone: true }) + statuses_index = statuses_index.filter.or(term: { searchable_by_anyone: true }) end definition = parsed_query.apply(statuses_index) From a3faf835860bcfb8fc6439e96ccdd8fa0b0a9e57 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 20:15:07 -0800 Subject: [PATCH 05/20] Fix rubocop lint --- app/services/search_service.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 7f69557f38..b82b66c91b 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true class SearchService < BaseService - SEARCH_ALL_VISIBLE_STATUSES = ENV['SEARCH_ALL_VISIBLE_STATUSES'] == 'true' def call(query, account, limit, options = {}) From 3fa72fd56cc325c523493ab631286a94f30910f6 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 22:53:51 -0800 Subject: [PATCH 06/20] Don't search unlisted statuses --- .env.production.sample | 4 ++-- app/chewy/statuses_index.rb | 2 +- app/services/search_service.rb | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.env.production.sample b/.env.production.sample index 64b6e4979d..a50d301994 100644 --- a/.env.production.sample +++ b/.env.production.sample @@ -283,10 +283,10 @@ MAX_POLL_OPTION_CHARS=100 # Customize the number of hashtags shown in 'Explore' # MAX_TRENDING_TAGS=10 -# Search all visible statuses +# Search all public statuses # (Normally searches only a user's own statuses, favs, bookmarks, and mentions) # Only relevant when elasticsearch is installed -# SEARCH_ALL_VISIBLE_STATUSES=true +# SEARCH_ALL_PUBLIC_STATUSES=true # Maximum custom emoji file sizes # If undefined or smaller than MAX_EMOJI_SIZE, the value diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index a80c6eca23..2984931693 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -71,6 +71,6 @@ class StatusesIndex < Chewy::Index end field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } - field :searchable_by_anyone, type: 'boolean', value: ->(status) { status.public_visibility? or status.unlisted_visibility? } + field :searchable_by_anyone, type: 'boolean', value: ->(status) { status.public_visibility? } end end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index b82b66c91b..ca1dd855a1 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class SearchService < BaseService - SEARCH_ALL_VISIBLE_STATUSES = ENV['SEARCH_ALL_VISIBLE_STATUSES'] == 'true' + SEARCH_ALL_PUBLIC_STATUSES = ENV['SEARCH_ALL_PUBLIC_STATUSES'] == 'true' def call(query, account, limit, options = {}) @query = query&.strip From 494d98c3a8befb285537b84a3b1e65a9d2debaf5 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 28 Nov 2022 23:26:59 -0800 Subject: [PATCH 07/20] Fix incomplete name change --- app/services/search_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/search_service.rb b/app/services/search_service.rb index ca1dd855a1..725b1096af 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -38,7 +38,7 @@ class SearchService < BaseService def perform_statuses_search! statuses_index = StatusesIndex.filter(term: { searchable_by: @account.id }) - if SEARCH_ALL_VISIBLE_TOOTS + if SEARCH_ALL_PUBLIC_STATUSES statuses_index = statuses_index.filter.or(term: { searchable_by_anyone: true }) end definition = parsed_query.apply(statuses_index) From 98c9bc52c0f708ea49d4cccdd12e0ffe1beff137 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 11 Dec 2022 14:24:02 -0800 Subject: [PATCH 08/20] Update indexer, add scope switch --- .env.production.sample | 11 ++++++----- app/chewy/statuses_index.rb | 5 ++++- app/lib/importer/statuses_index_importer.rb | 20 +++++++++++++++++++- app/models/status.rb | 1 + app/services/search_service.rb | 9 +++++---- config/initializers/search_scope.rb | 12 ++++++++++++ 6 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 config/initializers/search_scope.rb diff --git a/.env.production.sample b/.env.production.sample index a50d301994..e97a041260 100644 --- a/.env.production.sample +++ b/.env.production.sample @@ -238,7 +238,7 @@ SMTP_FROM_ADDRESS=notifications@example.com # SAML_ATTRIBUTES_STATEMENTS_FULL_NAME="urn:oid:2.16.840.1.113730.3.1.241" # SAML_ATTRIBUTES_STATEMENTS_FIRST_NAME="urn:oid:2.5.4.42" # SAML_ATTRIBUTES_STATEMENTS_LAST_NAME="urn:oid:2.5.4.4" -# SAML_UID_ATTRIBUTE="urn:oid:0.9.2342.19200300.100.1.1" +# SAML_UID_ATTRIBUTE="urn:oid:0.9.2342.19200300.100.1.1" # SAML_ATTRIBUTES_STATEMENTS_VERIFIED= # SAML_ATTRIBUTES_STATEMENTS_VERIFIED_EMAIL= @@ -283,10 +283,11 @@ MAX_POLL_OPTION_CHARS=100 # Customize the number of hashtags shown in 'Explore' # MAX_TRENDING_TAGS=10 -# Search all public statuses -# (Normally searches only a user's own statuses, favs, bookmarks, and mentions) -# Only relevant when elasticsearch is installed -# SEARCH_ALL_PUBLIC_STATUSES=true +# Scope of full-text searches: +# - public: search any status with public visibility +# - public_or_unlisted: search any status with public or unlisted visibility +# - classic: searches only a user's own statuses, favs, bookmarks, and mentions +# SEARCH_SCOPE=public # Maximum custom emoji file sizes # If undefined or smaller than MAX_EMOJI_SIZE, the value diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 2984931693..0cea76813e 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -65,12 +65,15 @@ class StatusesIndex < Chewy::Index root date_detection: false do field :id, type: 'long' field :account_id, type: 'long' + field :created_at, type: 'date' + field :visibility, type: 'keyword' + field :from, type: 'keyword', value: ->(status) { status.account.domain ? [status.account.acct] : [status.account.acct, "#{status.account.username}@#{Rails.configuration.x.local_domain}"] } + field :domain, value: ->(status) { status.account.domain or Rails.configuration.x.local_domain } field :text, type: 'text', value: ->(status) { status.searchable_text } do field :stemmed, type: 'text', analyzer: 'content' end field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } - field :searchable_by_anyone, type: 'boolean', value: ->(status) { status.public_visibility? } end end diff --git a/app/lib/importer/statuses_index_importer.rb b/app/lib/importer/statuses_index_importer.rb index 5b5153d5c8..fa11882a23 100644 --- a/app/lib/importer/statuses_index_importer.rb +++ b/app/lib/importer/statuses_index_importer.rb @@ -58,13 +58,21 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter end def scopes - [ + classic_scopes = [ local_statuses_scope, local_mentions_scope, local_favourites_scope, local_votes_scope, local_bookmarks_scope, ] + case Rails.configuration.x.search_scope + when :public + classic_scopes + [public_scope] + when :public_or_unlisted + classic_scopes + [public_or_unlisted_scope] + else + classic_scopes + end end def local_mentions_scope @@ -86,4 +94,14 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter def local_statuses_scope Status.local.select('"statuses"."id", COALESCE("statuses"."reblog_of_id", "statuses"."id") AS status_id') end + + # The `id` field in the above queries isn't used anywhere, so we leave it out of these. + + def public_scope + Status.with_public_visibility.select('"statuses"."id" AS status_id') + end + + def public_or_unlisted_scope + Status.with_public_or_unlisted_visibility.select('"statuses"."id" AS status_id') + end end diff --git a/app/models/status.rb b/app/models/status.rb index 6cfe19d238..fa1c95166f 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -99,6 +99,7 @@ class Status < ApplicationRecord scope :without_replies, -> { where('statuses.reply = FALSE OR statuses.in_reply_to_account_id = statuses.account_id') } scope :without_reblogs, -> { where('statuses.reblog_of_id IS NULL') } scope :with_public_visibility, -> { where(visibility: :public) } + scope :with_public_or_unlisted_visibility, -> { where(visibility: [:public, :unlisted]) } scope :tagged_with, ->(tag_ids) { joins(:statuses_tags).where(statuses_tags: { tag_id: tag_ids }) } scope :excluding_silenced_accounts, -> { left_outer_joins(:account).where(accounts: { silenced_at: nil }) } scope :including_silenced_accounts, -> { left_outer_joins(:account).where.not(accounts: { silenced_at: nil }) } diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 725b1096af..bce0f2d6e9 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true class SearchService < BaseService - SEARCH_ALL_PUBLIC_STATUSES = ENV['SEARCH_ALL_PUBLIC_STATUSES'] == 'true' - def call(query, account, limit, options = {}) @query = query&.strip @account = account @@ -38,8 +36,11 @@ class SearchService < BaseService def perform_statuses_search! statuses_index = StatusesIndex.filter(term: { searchable_by: @account.id }) - if SEARCH_ALL_PUBLIC_STATUSES - statuses_index = statuses_index.filter.or(term: { searchable_by_anyone: true }) + case Rails.configuration.x.search_scope + when :public + statuses_index = statuses_index.filter.or(term: { visibility: 'public' }) + when :public_or_unlisted + statuses_index = statuses_index.filter.or(term: { visibility: ['public', 'unlisted'] }) end definition = parsed_query.apply(statuses_index) diff --git a/config/initializers/search_scope.rb b/config/initializers/search_scope.rb new file mode 100644 index 0000000000..3024405709 --- /dev/null +++ b/config/initializers/search_scope.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +Rails.application.configure do + config.x.search_scope = case + when ENV['SEARCH_SCOPE'] == 'public' + :public + when ENV['SEARCH_SCOPE'] == 'public_or_unlisted' + :public_or_unlisted + else + :classic + end +end From 87b5f7f48576535ba0b0c8ab21257e795b4096dd Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 18 Dec 2022 16:21:32 -0800 Subject: [PATCH 09/20] Explicit ES import when AP status or account is created or updated --- app/lib/activitypub/activity/create.rb | 2 ++ app/lib/importer/statuses_index_importer.rb | 3 ++- app/services/activitypub/process_account_service.rb | 2 ++ app/services/activitypub/process_status_update_service.rb | 2 ++ app/services/search_service.rb | 2 +- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index ebae129732..daf9fcf8e3 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -85,6 +85,8 @@ class ActivityPub::Activity::Create < ActivityPub::Activity attach_tags(@status) end + StatusesIndex.import! @status + resolve_thread(@status) fetch_replies(@status) distribute diff --git a/app/lib/importer/statuses_index_importer.rb b/app/lib/importer/statuses_index_importer.rb index fa11882a23..4bb1fd70ee 100644 --- a/app/lib/importer/statuses_index_importer.rb +++ b/app/lib/importer/statuses_index_importer.rb @@ -25,7 +25,8 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter # on the results of the filter, so this filtering happens here instead bulk.map! do |entry| new_entry = begin - if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? + if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? && + Rails.configuration.x.search_scope == :classic { delete: entry[:index].except(:data) } else entry diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index 99bcb38353..1c95b7055c 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -73,6 +73,8 @@ class ActivityPub::ProcessAccountService < BaseService set_fetchable_attributes! unless @options[:only_key] || @account.suspended? @account.save_with_optional_media! + + AccountsIndex.import! @account end def set_immediate_protocol_attributes! diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index a0605b1a3b..7cc307d072 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -50,6 +50,8 @@ class ActivityPub::ProcessStatusUpdateService < BaseService reset_preview_card! broadcast_updates! + + StatusesIndex.import! @status end forward_activity! if significant_changes? && @status_parser.edited_at > last_edit_date diff --git a/app/services/search_service.rb b/app/services/search_service.rb index bce0f2d6e9..a9ed121529 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -40,7 +40,7 @@ class SearchService < BaseService when :public statuses_index = statuses_index.filter.or(term: { visibility: 'public' }) when :public_or_unlisted - statuses_index = statuses_index.filter.or(term: { visibility: ['public', 'unlisted'] }) + statuses_index = statuses_index.filter.or(terms: { visibility: ['public', 'unlisted'] }) end definition = parsed_query.apply(statuses_index) From ac891d9dcecb305965baffdb88ef83ff7105b0d6 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 18 Dec 2022 16:34:07 -0800 Subject: [PATCH 10/20] Don't update status inside a lock, it's just queueing anyway --- app/services/activitypub/process_status_update_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index 7cc307d072..eecd942d36 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -50,10 +50,10 @@ class ActivityPub::ProcessStatusUpdateService < BaseService reset_preview_card! broadcast_updates! - - StatusesIndex.import! @status end + StatusesIndex.import! @status + forward_activity! if significant_changes? && @status_parser.edited_at > last_edit_date end From 9474bb27fc2af69f98f86d3ccca429695f5384f5 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 18 Dec 2022 22:39:08 -0800 Subject: [PATCH 11/20] The id column is actually used: something in the bulk import requires that the primary key column be in the custom select --- app/lib/importer/statuses_index_importer.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/lib/importer/statuses_index_importer.rb b/app/lib/importer/statuses_index_importer.rb index 4bb1fd70ee..1c62a5e708 100644 --- a/app/lib/importer/statuses_index_importer.rb +++ b/app/lib/importer/statuses_index_importer.rb @@ -96,13 +96,11 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter Status.local.select('"statuses"."id", COALESCE("statuses"."reblog_of_id", "statuses"."id") AS status_id') end - # The `id` field in the above queries isn't used anywhere, so we leave it out of these. - def public_scope - Status.with_public_visibility.select('"statuses"."id" AS status_id') + Status.with_public_visibility.select('"statuses"."id", "statuses"."id" AS status_id') end def public_or_unlisted_scope - Status.with_public_or_unlisted_visibility.select('"statuses"."id" AS status_id') + Status.with_public_or_unlisted_visibility.select('"statuses"."id", "statuses"."id" AS status_id') end end From 844a1b8476a7a282c99098e3e91ed664e80eb4e5 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 19 Dec 2022 13:17:12 -0800 Subject: [PATCH 12/20] parallel gem no longer required --- Gemfile | 1 - Gemfile.lock | 1 - 2 files changed, 2 deletions(-) diff --git a/Gemfile b/Gemfile index d43496a2f2..9843d1cee9 100644 --- a/Gemfile +++ b/Gemfile @@ -64,7 +64,6 @@ gem 'nokogiri', '~> 1.13' gem 'nsa', '~> 0.2' gem 'oj', '~> 3.13' gem 'ox', '~> 2.14' -gem 'parallel', '~> 1.22' gem 'parslet' gem 'posix-spawn' gem 'pundit', '~> 2.2' diff --git a/Gemfile.lock b/Gemfile.lock index 99d29f342d..237edb61ce 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -804,7 +804,6 @@ DEPENDENCIES omniauth-rails_csrf_protection (~> 0.1) omniauth-saml (~> 1.10) ox (~> 2.14) - parallel (~> 1.22) parslet pg (~> 1.4) pghero (~> 2.8) From 3ea4a5ed29b4ae06c9bfd1d8561190d69b0e7242 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 19 Dec 2022 13:17:29 -0800 Subject: [PATCH 13/20] Fix rubocop complaint --- app/lib/importer/statuses_index_importer.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/lib/importer/statuses_index_importer.rb b/app/lib/importer/statuses_index_importer.rb index 1c62a5e708..795bb057c5 100644 --- a/app/lib/importer/statuses_index_importer.rb +++ b/app/lib/importer/statuses_index_importer.rb @@ -25,8 +25,9 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter # on the results of the filter, so this filtering happens here instead bulk.map! do |entry| new_entry = begin - if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? && - Rails.configuration.x.search_scope == :classic + if entry[:index] && + entry.dig(:index, :data, 'searchable_by').blank? && + Rails.configuration.x.search_scope == :classic { delete: entry[:index].except(:data) } else entry From 40519dac1ae36a5fdf7463585b3cdba173f02f61 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 19 Dec 2022 13:24:43 -0800 Subject: [PATCH 14/20] Remove index fields that are not yet used --- app/chewy/statuses_index.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 0cea76813e..d779a226a2 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -65,10 +65,7 @@ class StatusesIndex < Chewy::Index root date_detection: false do field :id, type: 'long' field :account_id, type: 'long' - field :created_at, type: 'date' field :visibility, type: 'keyword' - field :from, type: 'keyword', value: ->(status) { status.account.domain ? [status.account.acct] : [status.account.acct, "#{status.account.username}@#{Rails.configuration.x.local_domain}"] } - field :domain, value: ->(status) { status.account.domain or Rails.configuration.x.local_domain } field :text, type: 'text', value: ->(status) { status.searchable_text } do field :stemmed, type: 'text', analyzer: 'content' From 26d465175aee2aab7190f2eb9fa02a2a752b571b Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Fri, 30 Dec 2022 12:48:23 -0800 Subject: [PATCH 15/20] Add sort operator, default to newest first --- app/chewy/statuses_index.rb | 1 + app/lib/search_query_transformer.rb | 34 ++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index d779a226a2..e6f9a04c30 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -65,6 +65,7 @@ class StatusesIndex < Chewy::Index root date_detection: false do field :id, type: 'long' field :account_id, type: 'long' + field :created_at, type: 'date' field :visibility, type: 'keyword' field :text, type: 'text', value: ->(status) { status.searchable_text } do diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index aef05e9d9d..d1671459c1 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -2,7 +2,7 @@ class SearchQueryTransformer < Parslet::Transform class Query - attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses + attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses, :order_clauses def initialize(clauses) grouped = clauses.chunk(&:operator).to_h @@ -10,6 +10,7 @@ class SearchQueryTransformer < Parslet::Transform @must_not_clauses = grouped.fetch(:must_not, []) @must_clauses = grouped.fetch(:must, []) @filter_clauses = grouped.fetch(:filter, []) + @order_clauses = grouped.fetch(:order, []) end def apply(search) @@ -17,6 +18,12 @@ class SearchQueryTransformer < Parslet::Transform must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) } must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) } filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) } + if order_clauses.empty? + # Default to most recent results first. + search = search.order(created_at: :desc) + else + order_clauses.each { |clause| search = search.order(**clause_to_order(clause)) } + end search.query.minimum_should_match(1) end @@ -41,6 +48,15 @@ class SearchQueryTransformer < Parslet::Transform raise "Unexpected clause type: #{clause}" end end + + def clause_to_order(clause) + case clause + when PrefixClause + { clause.term => clause.order } + else + raise "Unexpected clause type: #{clause}" + end + end end class Operator @@ -81,12 +97,12 @@ class SearchQueryTransformer < Parslet::Transform end class PrefixClause - attr_reader :filter, :operator, :term + attr_reader :filter, :operator, :term, :order def initialize(prefix, term) - @operator = :filter case prefix when 'from' + @operator = :filter @filter = :account_id username, domain = term.gsub(/\A@/, '').split('@') @@ -94,6 +110,18 @@ class SearchQueryTransformer < Parslet::Transform account = Account.find_remote!(username, domain) @term = account.id + when 'sort' + @operator = :order + @term = :created_at + + case term + when 'oldest' + @order = :asc + when 'newest' + @order = :desc + else + raise Mastodon::SyntaxError + end else raise Mastodon::SyntaxError end From bf34294fcb4e184a5af9fb6f18d663d647d8557a Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Fri, 30 Dec 2022 15:11:27 -0800 Subject: [PATCH 16/20] Add is:, has:, domain:, lang: search filters --- app/chewy/statuses_index.rb | 4 ++++ app/lib/search_query_parser.rb | 2 +- app/lib/search_query_transformer.rb | 21 ++++++++++++++++++--- app/models/status.rb | 20 ++++++++++++++++++++ app/services/fetch_link_card_service.rb | 8 ++++++++ 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index e6f9a04c30..17df16b612 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -67,6 +67,10 @@ class StatusesIndex < Chewy::Index field :account_id, type: 'long' field :created_at, type: 'date' field :visibility, type: 'keyword' + field :domain, type: 'keyword', value: ->(status) { status.account.domain or Rails.configuration.x.local_domain } + field :lang, type: 'keyword', value: ->(status) { status.language } + field :is, type: 'keyword', value: ->(status) { status.searchable_is } + field :has, type: 'keyword', value: ->(status) { status.searchable_has } field :text, type: 'text', value: ->(status) { status.searchable_text } do field :stemmed, type: 'text', analyzer: 'content' diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb index 15956d4cfd..5d6ffbf29d 100644 --- a/app/lib/search_query_parser.rb +++ b/app/lib/search_query_parser.rb @@ -9,7 +9,7 @@ class SearchQueryParser < Parslet::Parser rule(:prefix) { (term >> colon).as(:prefix) } rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) } rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } - rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term | shortcode)).as(:clause) } + rule(:clause) { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) } rule(:query) { (clause >> space.maybe).repeat.as(:query) } root(:query) end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index d1671459c1..4cba60aaea 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -35,6 +35,8 @@ class SearchQueryTransformer < Parslet::Transform { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } } when PhraseClause { match_phrase: { text: { query: clause.phrase } } } + when PrefixClause + { term: { clause.filter => clause.term } } else raise "Unexpected clause type: #{clause}" end @@ -99,10 +101,21 @@ class SearchQueryTransformer < Parslet::Transform class PrefixClause attr_reader :filter, :operator, :term, :order - def initialize(prefix, term) + def initialize(prefix, operator, term) + case operator + when '+', nil + @operator = :filter + when '-' + @operator = :must_not + else + raise "Unknown operator: #{str}" + end + case prefix + when 'domain', 'is', 'has', 'lang' + @filter = prefix.to_s + @term = term when 'from' - @operator = :filter @filter = :account_id username, domain = term.gsub(/\A@/, '').split('@') @@ -111,6 +124,8 @@ class SearchQueryTransformer < Parslet::Transform @term = account.id when 'sort' + raise Mastodon::SyntaxError unless operator.nil? + @operator = :order @term = :created_at @@ -133,7 +148,7 @@ class SearchQueryTransformer < Parslet::Transform operator = clause[:operator]&.to_s if clause[:prefix] - PrefixClause.new(prefix, clause[:term].to_s) + PrefixClause.new(prefix, operator, clause[:term].to_s) elsif clause[:term] TermClause.new(prefix, operator, clause[:term].to_s) elsif clause[:shortcode] diff --git a/app/models/status.rb b/app/models/status.rb index fa1c95166f..4b5dcc5111 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -173,6 +173,26 @@ class Status < ApplicationRecord ].compact.join("\n\n") end + def searchable_is + keywords = [] + keywords << :bot if account.bot? + keywords << :local if local? + keywords << :local_only if local_only + keywords << :reply if reply? + keywords + end + + def searchable_has + keywords = [] + keywords << :cw if spoiler_text? + keywords << :link if FetchLinkCardService.new.link?(self) + keywords << :media if media_attachments.present? + keywords << :mention if mentions.present? + keywords << :poll if preloadable_poll.present? + keywords << :tag if tags.present? + keywords + end + def to_log_human_identifier account.acct end diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index e5b5b730ec..86fe4d96b9 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -34,6 +34,14 @@ class FetchLinkCardService < BaseService nil end + ## + # Borrow most of this machinery to detect whether the status has at least one link. + def link?(status) + @status = status + @original_url = parse_urls + !@original_url.nil? + end + private def process_url From a6ee508eb75ffae2423422dcffa50ed23c8117ed Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Fri, 30 Dec 2022 15:58:34 -0800 Subject: [PATCH 17/20] Add visibility: and is:sensitive filters --- app/lib/search_query_transformer.rb | 2 +- app/models/status.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 4cba60aaea..702f29a01a 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -112,7 +112,7 @@ class SearchQueryTransformer < Parslet::Transform end case prefix - when 'domain', 'is', 'has', 'lang' + when 'domain', 'is', 'has', 'lang', 'visibility' @filter = prefix.to_s @term = term when 'from' diff --git a/app/models/status.rb b/app/models/status.rb index 4b5dcc5111..02b53da096 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -179,6 +179,7 @@ class Status < ApplicationRecord keywords << :local if local? keywords << :local_only if local_only keywords << :reply if reply? + keywords << :sensitive if sensitive? keywords end From 30faf1f6daeba07e6ff32b0e2dc331fcc07fdafa Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 1 Jan 2023 15:30:13 -0800 Subject: [PATCH 18/20] Add before: and after: filters --- app/lib/search_query_transformer.rb | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 702f29a01a..6fc2b2383e 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -45,7 +45,7 @@ class SearchQueryTransformer < Parslet::Transform def clause_to_filter(clause) case clause when PrefixClause - { term: { clause.filter => clause.term } } + { clause.query => { clause.filter => clause.term } } else raise "Unexpected clause type: #{clause}" end @@ -99,9 +99,11 @@ class SearchQueryTransformer < Parslet::Transform end class PrefixClause - attr_reader :filter, :operator, :term, :order + attr_reader :filter, :operator, :term, :order, :query def initialize(prefix, operator, term) + @query = :term + case operator when '+', nil @operator = :filter @@ -115,6 +117,17 @@ class SearchQueryTransformer < Parslet::Transform when 'domain', 'is', 'has', 'lang', 'visibility' @filter = prefix.to_s @term = term + when 'before', 'after' + @query = :range + @filter = 'created_at' + case prefix + when 'before' + @term = { lt: term } + when 'after' + @term = { gt: term } + else + raise Mastodon::SyntaxError + end when 'from' @filter = :account_id From d1797b437f9a62e72430e01364556f82ac9a92df Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Sun, 1 Jan 2023 16:31:40 -0800 Subject: [PATCH 19/20] Add scope:classic filter --- app/lib/search_query_transformer.rb | 13 +++++++++---- app/services/search_service.rb | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 6fc2b2383e..a6c0c2f2d4 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -13,11 +13,11 @@ class SearchQueryTransformer < Parslet::Transform @order_clauses = grouped.fetch(:order, []) end - def apply(search) + def apply(search, account) should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) } must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) } must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) } - filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) } + filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause, account)) } if order_clauses.empty? # Default to most recent results first. search = search.order(created_at: :desc) @@ -42,10 +42,10 @@ class SearchQueryTransformer < Parslet::Transform end end - def clause_to_filter(clause) + def clause_to_filter(clause, account) case clause when PrefixClause - { clause.query => { clause.filter => clause.term } } + { clause.query => { clause.filter => clause.term == :account_id_placeholder ? account.id : clause.term } } else raise "Unexpected clause type: #{clause}" end @@ -136,6 +136,11 @@ class SearchQueryTransformer < Parslet::Transform account = Account.find_remote!(username, domain) @term = account.id + when 'scope' + raise Mastodon::SyntaxError unless operator.nil? + raise Mastodon::SyntaxError unless term == 'classic' + @filter = 'searchable_by' + @term = :account_id_placeholder when 'sort' raise Mastodon::SyntaxError unless operator.nil? diff --git a/app/services/search_service.rb b/app/services/search_service.rb index a9ed121529..673cf34b82 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -42,7 +42,7 @@ class SearchService < BaseService when :public_or_unlisted statuses_index = statuses_index.filter.or(terms: { visibility: ['public', 'unlisted'] }) end - definition = parsed_query.apply(statuses_index) + definition = parsed_query.apply(statuses_index, @account) if @options[:account_id].present? definition = definition.filter(term: { account_id: @options[:account_id] }) From b342b12dbd5a424129e4e2466de157341fbde7d9 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 30 Jan 2023 21:18:15 -0800 Subject: [PATCH 20/20] Don't import if ES is not enabled --- app/lib/activitypub/activity/create.rb | 2 +- app/services/activitypub/process_account_service.rb | 2 +- app/services/activitypub/process_status_update_service.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index daf9fcf8e3..344f24b521 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -85,7 +85,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity attach_tags(@status) end - StatusesIndex.import! @status + StatusesIndex.import! @status if Chewy.enabled? resolve_thread(@status) fetch_replies(@status) diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index 1c95b7055c..d536486f02 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -74,7 +74,7 @@ class ActivityPub::ProcessAccountService < BaseService @account.save_with_optional_media! - AccountsIndex.import! @account + AccountsIndex.import! @account if Chewy.enabled? end def set_immediate_protocol_attributes! diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index eecd942d36..fb16114f92 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -52,7 +52,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService broadcast_updates! end - StatusesIndex.import! @status + StatusesIndex.import! @status if Chewy.enabled? forward_activity! if significant_changes? && @status_parser.edited_at > last_edit_date end