glitchier-soc/lib/mastodon/cli/search.rb

# frozen_string_literal: true

require_relative 'base'

module Mastodon::CLI
  class Search < Base
    # Indices are sorted by amount of data to be expected in each, so that
    # smaller indices can go online sooner
    INDICES = [
      InstancesIndex,
      AccountsIndex,
      TagsIndex,
      StatusesIndex,
    ].freeze

    option :concurrency, type: :numeric, default: 5, aliases: [:c], desc: 'Workload will be split between this number of threads'
    option :batch_size, type: :numeric, default: 100, aliases: [:b], desc: 'Number of records in each batch'
    option :only, type: :array, enum: %w(instances accounts tags statuses), desc: 'Only process these indices'
    option :import, type: :boolean, default: true, desc: 'Import data from the database to the index'
    option :clean, type: :boolean, default: true, desc: 'Remove outdated documents from the index'
    desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'
    long_desc <<~LONG_DESC
      If Elasticsearch is empty, this command will create the necessary indices
      and then import data from the database into those indices.

      This command will also upgrade indices if the underlying schema has been
      changed since the last run. Index upgrades erase index data.

      Even if creating or upgrading indices is not necessary, data from the
      database will be imported into the indices, unless overridden with --no-import.
    LONG_DESC
    def deploy
      verify_deploy_options!

      indices = if options[:only]
                  options[:only].map { |str| "#{str.camelize}Index".constantize }
                else
                  INDICES
                end

      pool      = Concurrent::FixedThreadPool.new(options[:concurrency], max_queue: options[:concurrency] * 10)
      importers = indices.index_with { |index| "Importer::#{index.name}Importer".constantize.new(batch_size: options[:batch_size], executor: pool) }
      progress  = ProgressBar.create(total: nil, format: '%t%c/%u |%b%i| %e (%r docs/s)', autofinish: false)

      # First, ensure all indices are created and have the correct
      # structure, so that live data can already be written
      indices.select { |index| index.specification.changed? }.each do |index|
        progress.title = "Upgrading #{index} "
        index.purge
        index.specification.lock!
      end

      progress.title = 'Estimating workload '
      progress.total = indices.sum { |index| importers[index].estimate! }

      reset_connection_pools!

      added   = 0
      removed = 0

      indices.each do |index|
        importer = importers[index]
        importer.optimize_for_import!

        importer.on_progress do |(indexed, deleted)|
          progress.total = nil if progress.progress + indexed + deleted > progress.total
          progress.progress += indexed + deleted
          added   += indexed
          removed += deleted
        end

        importer.on_failure do |reason|
          progress.log(pastel.red("Error while importing #{index}: #{reason}"))
        end

        if options[:import]
          progress.title = "Importing #{index} "
          importer.import!
        end

        if options[:clean]
          progress.title = "Cleaning #{index} "
          importer.clean_up!
        end
      ensure
        importer.optimize_for_search!
      end

      progress.title = 'Done! '
      progress.finish

      say("Indexed #{added} records, de-indexed #{removed}", :green, true)
    end

    private

    def verify_deploy_options!
      verify_deploy_concurrency!
      verify_deploy_batch_size!
    end

    def verify_deploy_concurrency!
      return unless options[:concurrency] < 1

      say('Cannot run with this concurrency setting, must be at least 1', :red)
      exit(1)
    end

    def verify_deploy_batch_size!
      return unless options[:batch_size] < 1

      say('Cannot run with this batch_size setting, must be at least 1', :red)
      exit(1)
    end
  end
end
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`# frozen_string_literal: true`

Add CLI Base class for command line code (#25106) 2 years ago			`require_relative 'base'`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago
Move the mastodon/_cli files to mastodon/cli/ (#24139) 2 years ago			`module Mastodon::CLI`
Add CLI Base class for command line code (#25106) 2 years ago			`class Search < Base`
Change `tootctl search deploy` algorithm (#14300) 4 years ago			`# Indices are sorted by amount of data to be expected in each, so that`
			`# smaller indices can go online sooner`
			`INDICES = [`
Change interaction modal in web UI (#26075) Co-authored-by: Eugen Rochko <eugen@zeonfederated.com> 1 year ago			`InstancesIndex,`
Change `tootctl search deploy` algorithm (#14300) 4 years ago			`AccountsIndex,`
			`TagsIndex,`
			`StatusesIndex,`
			`].freeze`

Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`option :concurrency, type: :numeric, default: 5, aliases: [:c], desc: 'Workload will be split between this number of threads'`
			`option :batch_size, type: :numeric, default: 100, aliases: [:b], desc: 'Number of records in each batch'`
Add missing instances option to tootctl search deploy (#26461) 1 year ago			`option :only, type: :array, enum: %w(instances accounts tags statuses), desc: 'Only process these indices'`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`option :import, type: :boolean, default: true, desc: 'Import data from the database to the index'`
			`option :clean, type: :boolean, default: true, desc: 'Remove outdated documents from the index'`
Fix ElasticSearch to Elasticsearch (#17050) 3 years ago			`desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`long_desc <<~LONG_DESC`
Fix ElasticSearch to Elasticsearch (#17050) 3 years ago			`If Elasticsearch is empty, this command will create the necessary indices`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`and then import data from the database into those indices.`

			`This command will also upgrade indices if the underlying schema has been`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`changed since the last run. Index upgrades erase index data.`
Add parallelization to `tootctl search deploy` (#12051) * Add parallel gem * Modify parallel option in tootctl search deploy * Add paralell option to tootctl search deploy * Change 1 to false * Clean up * Rename --parallel to --processes 5 years ago
Change `tootctl search deploy` algorithm (#14300) 4 years ago			`Even if creating or upgrading indices is not necessary, data from the`
Fix typos (#18604) * Fix typos Found via `codespell -q 3 -S ./CHANGELOG.md,./AUTHORS.md,./config/locales,./app/javascript/mastodon/locales -L ba,keypair,medias,pixelx,ro` * Follow-up typo fix 2 years ago			`database will be imported into the indices, unless overridden with --no-import.`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`LONG_DESC`
			`def deploy`
Extract verify options method in search cli (#25121) 1 year ago			`verify_deploy_options!`
Add batch_size option to bin/tootctl search deploy (#17049) 3 years ago
Autofix Rubocop Style/RedundantBegin (#23703) 2 years ago			`indices = if options[:only]`
			`options[:only].map { \|str\| "#{str.camelize}Index".constantize }`
			`else`
			`INDICES`
			`end`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`pool = Concurrent::FixedThreadPool.new(options[:concurrency], max_queue: options[:concurrency] * 10)`
			`importers = indices.index_with { \|index\| "Importer::#{index.name}Importer".constantize.new(batch_size: options[:batch_size], executor: pool) }`
			`progress = ProgressBar.create(total: nil, format: '%t%c/%u \|%b%i\| %e (%r docs/s)', autofinish: false)`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
			`# First, ensure all indices are created and have the correct`
			`# structure, so that live data can already be written`
			`indices.select { \|index\| index.specification.changed? }.each do \|index\|`
			`progress.title = "Upgrading #{index} "`
			`index.purge`
			`index.specification.lock!`
			`end`

Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`progress.title = 'Estimating workload '`
			`progress.total = indices.sum { \|index\| importers[index].estimate! }`

Fix opening and closing Redis connections instead of using a pool (#18171) * Fix opening and closing Redis connections instead of using a pool * Fix Redis connections not being returned to the pool in CLI commands 3 years ago			`reset_connection_pools!`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`added = 0`
			`removed = 0`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`indices.each do \|index\|`
			`importer = importers[index]`
			`importer.optimize_for_import!`

			`importer.on_progress do \|(indexed, deleted)\|`
			`progress.total = nil if progress.progress + indexed + deleted > progress.total`
			`progress.progress += indexed + deleted`
			`added += indexed`
			`removed += deleted`
			`end`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`importer.on_failure do \|reason\|`
			`progress.log(pastel.red("Error while importing #{index}: #{reason}"))`
			`end`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`if options[:import]`
			`progress.title = "Importing #{index} "`
			`importer.import!`
			`end`

			`if options[:clean]`
			`progress.title = "Cleaning #{index} "`
			`importer.clean_up!`
Change `tootctl search deploy` algorithm (#14300) 4 years ago			`end`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`ensure`
			`importer.optimize_for_search!`
Add parallelization to `tootctl search deploy` (#12051) * Add parallel gem * Modify parallel option in tootctl search deploy * Add paralell option to tootctl search deploy * Change 1 to false * Clean up * Rename --parallel to --processes 5 years ago			`end`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`progress.title = 'Done! '`
			`progress.finish`
Change `tootctl search deploy` algorithm (#14300) 4 years ago
Change algorithm of `tootctl search deploy` to improve performance (#18463) 3 years ago			`say("Indexed #{added} records, de-indexed #{removed}", :green, true)`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`end`
Extract verify options method in search cli (#25121) 1 year ago
			`private`

			`def verify_deploy_options!`
			`verify_deploy_concurrency!`
			`verify_deploy_batch_size!`
			`end`

			`def verify_deploy_concurrency!`
			`return unless options[:concurrency] < 1`

			`say('Cannot run with this concurrency setting, must be at least 1', :red)`
			`exit(1)`
			`end`

			`def verify_deploy_batch_size!`
			`return unless options[:batch_size] < 1`

			`say('Cannot run with this batch_size setting, must be at least 1', :red)`
			`exit(1)`
			`end`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 6 years ago			`end`
			`end`