Improve account deletion performances further (#15407)
* Delete status records by batches of 50 * Do not precompute values that are only used once * Do not generate redis events for removal of public toots older than two weeks * Filter reported toots a priori for polls and status deletion * Do not process reblogs when cleaning up public timelines As in Mastodon proper, reblogs don't appear in public TLs * Clean the deleted account's own feed in one go * Refactor Account#clean_feed_manager and List#clean_feed_manager * Delete instead of destroy a few more associations * Fix preloading Co-authored-by: Claire <claire.github-309c@sitedethib.com>
This commit is contained in:
parent
58e0d8924b
commit
8d75aa630f
7 changed files with 53 additions and 81 deletions
|
@ -230,6 +230,36 @@ class FeedManager
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Completely clear multiple feeds at once
|
||||||
|
# @param [Symbol] type
|
||||||
|
# @param [Array<Integer>] ids
|
||||||
|
# @return [void]
|
||||||
|
def clean_feeds!(type, ids)
|
||||||
|
reblogged_id_sets = {}
|
||||||
|
|
||||||
|
redis.pipelined do
|
||||||
|
ids.each do |feed_id|
|
||||||
|
redis.del(key(type, feed_id))
|
||||||
|
reblog_key = key(type, feed_id, 'reblogs')
|
||||||
|
# We collect a future for this: we don't block while getting
|
||||||
|
# it, but we can iterate over it later.
|
||||||
|
reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1)
|
||||||
|
redis.del(reblog_key)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Remove all of the reblog tracking keys we just removed the
|
||||||
|
# references to.
|
||||||
|
redis.pipelined do
|
||||||
|
reblogged_id_sets.each do |feed_id, future|
|
||||||
|
future.value.each do |reblogged_id|
|
||||||
|
reblog_set_key = key(type, feed_id, "reblogs:#{reblogged_id}")
|
||||||
|
redis.del(reblog_set_key)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
# Trim a feed to maximum size by removing older items
|
# Trim a feed to maximum size by removing older items
|
||||||
|
|
|
@ -578,17 +578,6 @@ class Account < ApplicationRecord
|
||||||
end
|
end
|
||||||
|
|
||||||
def clean_feed_manager
|
def clean_feed_manager
|
||||||
reblog_key = FeedManager.instance.key(:home, id, 'reblogs')
|
FeedManager.instance.clean_feeds!(:home, [id])
|
||||||
reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1)
|
|
||||||
|
|
||||||
Redis.current.pipelined do
|
|
||||||
Redis.current.del(FeedManager.instance.key(:home, id))
|
|
||||||
Redis.current.del(reblog_key)
|
|
||||||
|
|
||||||
reblogged_id_set.each do |reblogged_id|
|
|
||||||
reblog_set_key = FeedManager.instance.key(:home, id, "reblogs:#{reblogged_id}")
|
|
||||||
Redis.current.del(reblog_set_key)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -34,17 +34,6 @@ class List < ApplicationRecord
|
||||||
private
|
private
|
||||||
|
|
||||||
def clean_feed_manager
|
def clean_feed_manager
|
||||||
reblog_key = FeedManager.instance.key(:list, id, 'reblogs')
|
FeedManager.instance.clean_feeds!(:list, [id])
|
||||||
reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1)
|
|
||||||
|
|
||||||
Redis.current.pipelined do
|
|
||||||
Redis.current.del(FeedManager.instance.key(:list, id))
|
|
||||||
Redis.current.del(reblog_key)
|
|
||||||
|
|
||||||
reblogged_id_set.each do |reblogged_id|
|
|
||||||
reblog_set_key = FeedManager.instance.key(:list, id, "reblogs:#{reblogged_id}")
|
|
||||||
Redis.current.del(reblog_set_key)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,7 +8,7 @@ class BatchedRemoveStatusService < BaseService
|
||||||
# @param [Hash] options
|
# @param [Hash] options
|
||||||
# @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
|
# @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
|
||||||
def call(statuses, **options)
|
def call(statuses, **options)
|
||||||
ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
|
ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, :tags, reblogs: :account])
|
||||||
|
|
||||||
statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
|
statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class BatchedRemoveStatusService < BaseService
|
||||||
# transaction lock the database, but we use the delete method instead
|
# transaction lock the database, but we use the delete method instead
|
||||||
# of destroy to avoid all callbacks. We rely on foreign keys to
|
# of destroy to avoid all callbacks. We rely on foreign keys to
|
||||||
# cascade the delete faster without loading the associations.
|
# cascade the delete faster without loading the associations.
|
||||||
statuses_and_reblogs.each(&:delete)
|
statuses_and_reblogs.each_slice(50) { |slice| Status.where(id: slice.map(&:id)).delete_all }
|
||||||
|
|
||||||
# Since we skipped all callbacks, we also need to manually
|
# Since we skipped all callbacks, we also need to manually
|
||||||
# deindex the statuses
|
# deindex the statuses
|
||||||
|
@ -35,11 +35,6 @@ class BatchedRemoveStatusService < BaseService
|
||||||
|
|
||||||
return if options[:skip_side_effects]
|
return if options[:skip_side_effects]
|
||||||
|
|
||||||
ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
|
|
||||||
|
|
||||||
@tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
|
|
||||||
@json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
|
|
||||||
|
|
||||||
# Batch by source account
|
# Batch by source account
|
||||||
statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
|
statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
|
||||||
account = account_statuses.first.account
|
account = account_statuses.first.account
|
||||||
|
@ -51,8 +46,9 @@ class BatchedRemoveStatusService < BaseService
|
||||||
end
|
end
|
||||||
|
|
||||||
# Cannot be batched
|
# Cannot be batched
|
||||||
|
@status_id_cutoff = Mastodon::Snowflake.id_at(2.weeks.ago)
|
||||||
redis.pipelined do
|
redis.pipelined do
|
||||||
statuses_and_reblogs.each do |status|
|
statuses.each do |status|
|
||||||
unpush_from_public_timelines(status)
|
unpush_from_public_timelines(status)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -66,12 +62,6 @@ class BatchedRemoveStatusService < BaseService
|
||||||
FeedManager.instance.unpush_from_home(follower, status)
|
FeedManager.instance.unpush_from_home(follower, status)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return unless account.local?
|
|
||||||
|
|
||||||
statuses.each do |status|
|
|
||||||
FeedManager.instance.unpush_from_home(account, status)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def unpush_from_list_timelines(account, statuses)
|
def unpush_from_list_timelines(account, statuses)
|
||||||
|
@ -83,9 +73,9 @@ class BatchedRemoveStatusService < BaseService
|
||||||
end
|
end
|
||||||
|
|
||||||
def unpush_from_public_timelines(status)
|
def unpush_from_public_timelines(status)
|
||||||
return unless status.public_visibility?
|
return unless status.public_visibility? && status.id > @status_id_cutoff
|
||||||
|
|
||||||
payload = @json_payloads[status.id]
|
payload = Oj.dump(event: :delete, payload: status.id.to_s)
|
||||||
|
|
||||||
redis.publish('timeline:public', payload)
|
redis.publish('timeline:public', payload)
|
||||||
redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
|
redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
|
||||||
|
@ -95,7 +85,7 @@ class BatchedRemoveStatusService < BaseService
|
||||||
redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
|
redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
|
||||||
end
|
end
|
||||||
|
|
||||||
@tags[status.id].each do |hashtag|
|
status.tags.map { |tag| tag.name.mb_chars.downcase }.each do |hashtag|
|
||||||
redis.publish("timeline:hashtag:#{hashtag}", payload)
|
redis.publish("timeline:hashtag:#{hashtag}", payload)
|
||||||
redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
|
redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
|
||||||
end
|
end
|
||||||
|
|
|
@ -46,10 +46,12 @@ class DeleteAccountService < BaseService
|
||||||
featured_tags
|
featured_tags
|
||||||
follow_requests
|
follow_requests
|
||||||
identity_proofs
|
identity_proofs
|
||||||
|
list_accounts
|
||||||
migrations
|
migrations
|
||||||
mute_relationships
|
mute_relationships
|
||||||
muted_by_relationships
|
muted_by_relationships
|
||||||
notifications
|
notifications
|
||||||
|
owned_lists
|
||||||
scheduled_statuses
|
scheduled_statuses
|
||||||
status_pins
|
status_pins
|
||||||
)
|
)
|
||||||
|
@ -145,15 +147,14 @@ class DeleteAccountService < BaseService
|
||||||
purge_media_attachments!
|
purge_media_attachments!
|
||||||
purge_polls!
|
purge_polls!
|
||||||
purge_generated_notifications!
|
purge_generated_notifications!
|
||||||
|
purge_feeds!
|
||||||
purge_other_associations!
|
purge_other_associations!
|
||||||
|
|
||||||
@account.destroy unless keep_account_record?
|
@account.destroy unless keep_account_record?
|
||||||
end
|
end
|
||||||
|
|
||||||
def purge_statuses!
|
def purge_statuses!
|
||||||
@account.statuses.reorder(nil).find_in_batches do |statuses|
|
@account.statuses.reorder(nil).where.not(id: reported_status_ids).in_batches do |statuses|
|
||||||
statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record?
|
|
||||||
|
|
||||||
BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
|
BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -167,11 +168,7 @@ class DeleteAccountService < BaseService
|
||||||
end
|
end
|
||||||
|
|
||||||
def purge_polls!
|
def purge_polls!
|
||||||
@account.polls.reorder(nil).find_each do |poll|
|
@account.polls.reorder(nil).where.not(status_id: reported_status_ids).in_batches.delete_all
|
||||||
next if keep_account_record? && reported_status_ids.include?(poll.status_id)
|
|
||||||
|
|
||||||
poll.delete
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def purge_generated_notifications!
|
def purge_generated_notifications!
|
||||||
|
@ -187,6 +184,13 @@ class DeleteAccountService < BaseService
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def purge_feeds!
|
||||||
|
return unless @account.local?
|
||||||
|
|
||||||
|
FeedManager.instance.clean_feeds!(:home, [@account.id])
|
||||||
|
FeedManager.instance.clean_feeds!(:list, @account.owned_lists.pluck(:id))
|
||||||
|
end
|
||||||
|
|
||||||
def purge_profile!
|
def purge_profile!
|
||||||
# If the account is going to be destroyed
|
# If the account is going to be destroyed
|
||||||
# there is no point wasting time updating
|
# there is no point wasting time updating
|
||||||
|
|
|
@ -14,37 +14,11 @@ class Scheduler::FeedCleanupScheduler
|
||||||
private
|
private
|
||||||
|
|
||||||
def clean_home_feeds!
|
def clean_home_feeds!
|
||||||
clean_feeds!(inactive_account_ids, :home)
|
feed_manager.clean_feeds!(:home, inactive_account_ids)
|
||||||
end
|
end
|
||||||
|
|
||||||
def clean_list_feeds!
|
def clean_list_feeds!
|
||||||
clean_feeds!(inactive_list_ids, :list)
|
feed_manager.clean_feeds!(:list, inactive_list_ids)
|
||||||
end
|
|
||||||
|
|
||||||
def clean_feeds!(ids, type)
|
|
||||||
reblogged_id_sets = {}
|
|
||||||
|
|
||||||
redis.pipelined do
|
|
||||||
ids.each do |feed_id|
|
|
||||||
redis.del(feed_manager.key(type, feed_id))
|
|
||||||
reblog_key = feed_manager.key(type, feed_id, 'reblogs')
|
|
||||||
# We collect a future for this: we don't block while getting
|
|
||||||
# it, but we can iterate over it later.
|
|
||||||
reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1)
|
|
||||||
redis.del(reblog_key)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Remove all of the reblog tracking keys we just removed the
|
|
||||||
# references to.
|
|
||||||
redis.pipelined do
|
|
||||||
reblogged_id_sets.each do |feed_id, future|
|
|
||||||
future.value.each do |reblogged_id|
|
|
||||||
reblog_set_key = feed_manager.key(type, feed_id, "reblogs:#{reblogged_id}")
|
|
||||||
redis.del(reblog_set_key)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def inactive_account_ids
|
def inactive_account_ids
|
||||||
|
|
|
@ -43,10 +43,6 @@ RSpec.describe BatchedRemoveStatusService, type: :service do
|
||||||
expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
|
expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'notifies streaming API of author' do
|
|
||||||
expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once)
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'notifies streaming API of public timeline' do
|
it 'notifies streaming API of public timeline' do
|
||||||
expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
|
expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue