From 0769ef827768a52c1fb8e8f7ceefa388fa94e923 Mon Sep 17 00:00:00 2001
From: Eugen Rochko <>
Date: Sun, 8 Dec 2019 15:37:12 +0100
Subject: [PATCH] Add `tootctl media remove-orphans` (#12568)

 app/models/media_attachment.rb   | 12 +++++
 config/initializers/paperclip.rb |  2 +-
 lib/mastodon/media_cli.rb        | 77 ++++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index 5d5034a52c..573ef5dfc1 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -167,6 +167,18 @@ class MediaAttachment < ApplicationRecord
     audio? || video?
+  def variant?(other_file_name)
+    return true if file_file_name == other_file_name
+    formats =
+    return false if formats.empty?
+    extension = File.extname(other_file_name)
+    formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name))
+  end
   def to_param
diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb
index 96607b7ce8..dadc492a01 100644
--- a/config/initializers/paperclip.rb
+++ b/config/initializers/paperclip.rb
@@ -89,7 +89,7 @@ else
     storage: :filesystem,
     use_timestamp: true,
-    path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename',
+    path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'),
     url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename',
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index 3b702f1557..96ad8556ac 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -44,6 +44,83 @@ module Mastodon
       say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
+    option :start_after
+    option :dry_run, type: :boolean, default: false
+    desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
+    long_desc <<~LONG_DESC
+      Scans file storage for files that do not belong to existing media attachments. Because this operation
+      requires iterating over every single file individually, it will be slow.
+      Please mind that some storage providers charge for the necessary API requests to list objects.
+    def remove_orphans
+      progress        = create_progress_bar(nil)
+      reclaimed_bytes = 0
+      removed         = 0
+      dry_run         = options[:dry_run] ? ' (DRY RUN)' : ''
+      case Paperclip::Attachment.default_options[:storage]
+      when :s3
+        paperclip_instance =
+        s3_interface       = paperclip_instance.s3_interface
+        bucket             = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
+        last_key           = options[:start_after]
+        loop do
+          objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
+          break if objects.empty?
+          last_key        = objects.last.key
+          attachments_map = MediaAttachment.where(id: { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[] = attachment }
+          objects.each do |object|
+            attachment_id = object.key.split('/')[2..-2].join.to_i
+            filename      = object.key.split('/').last
+            progress.increment
+            next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
+            reclaimed_bytes += object.size
+            removed += 1
+            object.delete unless options[:dry_run]
+            progress.log("Found and removed orphan: #{object.key}")
+          end
+        end
+      when :fog
+        say('The fog storage driver is not supported for this operation at this time', :red)
+        exit(1)
+      when :filesystem
+        require 'find'
+        root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
+        Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
+          next if
+          key           = path.gsub("#{root_path}#{File::SEPARATOR}", '')
+          attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
+          filename      = key.split(File::SEPARATOR).last
+          attachment    = MediaAttachment.find_by(id: attachment_id)
+          progress.increment
+          next unless attachment.nil? || !attachment.variant?(filename)
+          reclaimed_bytes += File.size(path)
+          removed += 1
+          File.delete(path) unless options[:dry_run]
+          progress.log("Found and removed orphan: #{key}")
+        end
+      end
+ = progress.progress
+      progress.finish
+      say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
+    end
     option :account, type: :string
     option :domain, type: :string
     option :status, type: :numeric