Change files to be deleted in batches instead of one-by-one (#23302)
This commit is contained in:
		
							parent
							
								
									ff9ed824db
								
							
						
					
					
						commit
						bcee25e36c
					
				
					 3 changed files with 114 additions and 29 deletions
				
			
		
							
								
								
									
										103
									
								
								app/lib/attachment_batch.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								app/lib/attachment_batch.rb
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,103 @@
 | 
				
			||||||
 | 
					# frozen_string_literal: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class AttachmentBatch
 | 
				
			||||||
 | 
					  # Maximum amount of objects you can delete in an S3 API call. It's
 | 
				
			||||||
 | 
					  # important to remember that this does not correspond to the number
 | 
				
			||||||
 | 
					  # of records in the batch, since records can have multiple attachments
 | 
				
			||||||
 | 
					  LIMIT = 1_000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Attributes generated and maintained by Paperclip (not all of them
 | 
				
			||||||
 | 
					  # are always used on every class, however)
 | 
				
			||||||
 | 
					  NULLABLE_ATTRIBUTES = %w(
 | 
				
			||||||
 | 
					    file_name
 | 
				
			||||||
 | 
					    content_type
 | 
				
			||||||
 | 
					    file_size
 | 
				
			||||||
 | 
					    fingerprint
 | 
				
			||||||
 | 
					    created_at
 | 
				
			||||||
 | 
					    updated_at
 | 
				
			||||||
 | 
					  ).freeze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Styles that are always present even when not explicitly defined
 | 
				
			||||||
 | 
					  BASE_STYLES = %i(original).freeze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  attr_reader :klass, :records, :storage_mode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def initialize(klass, records)
 | 
				
			||||||
 | 
					    @klass            = klass
 | 
				
			||||||
 | 
					    @records          = records
 | 
				
			||||||
 | 
					    @storage_mode     = Paperclip::Attachment.default_options[:storage]
 | 
				
			||||||
 | 
					    @attachment_names = klass.attachment_definitions.keys
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def delete
 | 
				
			||||||
 | 
					    remove_files
 | 
				
			||||||
 | 
					    batch.delete_all
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def clear
 | 
				
			||||||
 | 
					    remove_files
 | 
				
			||||||
 | 
					    batch.update_all(nullified_attributes) # rubocop:disable Rails/SkipsModelValidations
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def batch
 | 
				
			||||||
 | 
					    klass.where(id: records.map(&:id))
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def remove_files
 | 
				
			||||||
 | 
					    keys = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    logger.debug { "Preparing to delete attachments for #{records.size} records" }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    records.each do |record|
 | 
				
			||||||
 | 
					      @attachment_names.each do |attachment_name|
 | 
				
			||||||
 | 
					        attachment = record.public_send(attachment_name)
 | 
				
			||||||
 | 
					        styles     = BASE_STYLES | attachment.styles.keys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        next if attachment.blank?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        styles.each do |style|
 | 
				
			||||||
 | 
					          case @storage_mode
 | 
				
			||||||
 | 
					          when :s3
 | 
				
			||||||
 | 
					            logger.debug { "Adding #{attachment.path(style)} to batch for deletion" }
 | 
				
			||||||
 | 
					            keys << attachment.style_name_as_path(style)
 | 
				
			||||||
 | 
					          when :filesystem
 | 
				
			||||||
 | 
					            logger.debug { "Deleting #{attachment.path(style)}" }
 | 
				
			||||||
 | 
					            FileUtils.remove_file(attachment.path(style))
 | 
				
			||||||
 | 
					          when :fog
 | 
				
			||||||
 | 
					            logger.debug { "Deleting #{attachment.path(style)}" }
 | 
				
			||||||
 | 
					            attachment.directory.files.new(key: attachment.path(style)).destroy
 | 
				
			||||||
 | 
					          end
 | 
				
			||||||
 | 
					        end
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return unless storage_mode == :s3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # We can batch deletes over S3, but there is a limit of how many
 | 
				
			||||||
 | 
					    # objects can be processed at once, so we have to potentially
 | 
				
			||||||
 | 
					    # separate them into multiple calls.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keys.each_slice(LIMIT) do |keys_slice|
 | 
				
			||||||
 | 
					      logger.debug { "Deleting #{keys_slice.size} objects" }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      bucket.delete_objects(delete: {
 | 
				
			||||||
 | 
					        objects: keys_slice.map { |key| { key: key } },
 | 
				
			||||||
 | 
					        quiet: true,
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def bucket
 | 
				
			||||||
 | 
					    @bucket ||= records.first.public_send(@attachment_names.first).s3_bucket
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def nullified_attributes
 | 
				
			||||||
 | 
					    @attachment_names.flat_map { |attachment_name| NULLABLE_ATTRIBUTES.map { |attribute| "#{attachment_name}_#{attribute}" } & klass.column_names }.index_with(nil)
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def logger
 | 
				
			||||||
 | 
					    Rails.logger
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					end
 | 
				
			||||||
| 
						 | 
					@ -15,15 +15,15 @@ class Vacuum::MediaAttachmentsVacuum
 | 
				
			||||||
  private
 | 
					  private
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def vacuum_cached_files!
 | 
					  def vacuum_cached_files!
 | 
				
			||||||
    media_attachments_past_retention_period.find_each do |media_attachment|
 | 
					    media_attachments_past_retention_period.find_in_batches do |media_attachments|
 | 
				
			||||||
      media_attachment.file.destroy
 | 
					      AttachmentBatch.new(MediaAttachment, media_attachments).clear
 | 
				
			||||||
      media_attachment.thumbnail.destroy
 | 
					 | 
				
			||||||
      media_attachment.save
 | 
					 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def vacuum_orphaned_records!
 | 
					  def vacuum_orphaned_records!
 | 
				
			||||||
    orphaned_media_attachments.in_batches.destroy_all
 | 
					    orphaned_media_attachments.find_in_batches do |media_attachments|
 | 
				
			||||||
 | 
					      AttachmentBatch.new(MediaAttachment, media_attachments).delete
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def media_attachments_past_retention_period
 | 
					  def media_attachments_past_retention_period
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,14 +10,6 @@ class ClearDomainMediaService < BaseService
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private
 | 
					  private
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def invalidate_association_caches!(status_ids)
 | 
					 | 
				
			||||||
    # Normally, associated models of a status are immutable (except for accounts)
 | 
					 | 
				
			||||||
    # so they are aggressively cached. After updating the media attachments to no
 | 
					 | 
				
			||||||
    # longer point to a local file, we need to clear the cache to make those
 | 
					 | 
				
			||||||
    # changes appear in the API and UI
 | 
					 | 
				
			||||||
    Rails.cache.delete_multi(status_ids.map { |id| "statuses/#{id}" })
 | 
					 | 
				
			||||||
  end
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  def clear_media!
 | 
					  def clear_media!
 | 
				
			||||||
    clear_account_images!
 | 
					    clear_account_images!
 | 
				
			||||||
    clear_account_attachments!
 | 
					    clear_account_attachments!
 | 
				
			||||||
| 
						 | 
					@ -25,31 +17,21 @@ class ClearDomainMediaService < BaseService
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def clear_account_images!
 | 
					  def clear_account_images!
 | 
				
			||||||
    blocked_domain_accounts.reorder(nil).find_each do |account|
 | 
					    blocked_domain_accounts.reorder(nil).find_in_batches do |accounts|
 | 
				
			||||||
      account.avatar.destroy if account.avatar&.exists?
 | 
					      AttachmentBatch.new(Account, accounts).clear
 | 
				
			||||||
      account.header.destroy if account.header&.exists?
 | 
					 | 
				
			||||||
      account.save
 | 
					 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def clear_account_attachments!
 | 
					  def clear_account_attachments!
 | 
				
			||||||
    media_from_blocked_domain.reorder(nil).find_in_batches do |attachments|
 | 
					    media_from_blocked_domain.reorder(nil).find_in_batches do |attachments|
 | 
				
			||||||
      affected_status_ids = []
 | 
					      AttachmentBatch.new(MediaAttachment, attachments).clear
 | 
				
			||||||
 | 
					 | 
				
			||||||
      attachments.each do |attachment|
 | 
					 | 
				
			||||||
        affected_status_ids << attachment.status_id if attachment.status_id.present?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        attachment.file.destroy if attachment.file&.exists?
 | 
					 | 
				
			||||||
        attachment.type = :unknown
 | 
					 | 
				
			||||||
        attachment.save
 | 
					 | 
				
			||||||
      end
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      invalidate_association_caches!(affected_status_ids) unless affected_status_ids.empty?
 | 
					 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def clear_emojos!
 | 
					  def clear_emojos!
 | 
				
			||||||
    emojis_from_blocked_domains.destroy_all
 | 
					    emojis_from_blocked_domains.find_in_batches do |custom_emojis|
 | 
				
			||||||
 | 
					      AttachmentBatch.new(CustomEmoji, custom_emojis).delete
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def blocked_domain
 | 
					  def blocked_domain
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue