Optimize archive export service and export zip files instead of gzipped tar files (#23360)
This commit is contained in:
		
							parent
							
								
									d6808a561e
								
							
						
					
					
						commit
						e964406ec3
					
				
					 4 changed files with 157 additions and 63 deletions
				
			
		
							
								
								
									
										1
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								Gemfile
									
									
									
									
									
								
							|  | @ -162,3 +162,4 @@ gem 'xorcist', '~> 1.1' | ||||||
| gem 'cocoon', '~> 1.2' | gem 'cocoon', '~> 1.2' | ||||||
| 
 | 
 | ||||||
| gem 'net-http', '~> 0.3.2' | gem 'net-http', '~> 0.3.2' | ||||||
|  | gem 'rubyzip', '~> 2.3' | ||||||
|  |  | ||||||
|  | @ -636,6 +636,7 @@ GEM | ||||||
|       nokogiri (>= 1.10.5) |       nokogiri (>= 1.10.5) | ||||||
|       rexml |       rexml | ||||||
|     ruby2_keywords (0.0.5) |     ruby2_keywords (0.0.5) | ||||||
|  |     rubyzip (2.3.2) | ||||||
|     rufus-scheduler (3.8.2) |     rufus-scheduler (3.8.2) | ||||||
|       fugit (~> 1.1, >= 1.1.6) |       fugit (~> 1.1, >= 1.1.6) | ||||||
|     safety_net_attestation (0.4.0) |     safety_net_attestation (0.4.0) | ||||||
|  | @ -876,6 +877,7 @@ DEPENDENCIES | ||||||
|   rubocop-rails |   rubocop-rails | ||||||
|   rubocop-rspec |   rubocop-rspec | ||||||
|   ruby-progressbar (~> 1.13) |   ruby-progressbar (~> 1.13) | ||||||
|  |   rubyzip (~> 2.3) | ||||||
|   sanitize (~> 6.0) |   sanitize (~> 6.0) | ||||||
|   scenic (~> 1.7) |   scenic (~> 1.7) | ||||||
|   sidekiq (~> 6.5) |   sidekiq (~> 6.5) | ||||||
|  |  | ||||||
|  | @ -1,59 +1,67 @@ | ||||||
| # frozen_string_literal: true | # frozen_string_literal: true | ||||||
| 
 | 
 | ||||||
| require 'rubygems/package' | require 'zip' | ||||||
| 
 | 
 | ||||||
| class BackupService < BaseService | class BackupService < BaseService | ||||||
|   include Payloadable |   include Payloadable | ||||||
|  |   include ContextHelper | ||||||
| 
 | 
 | ||||||
|   attr_reader :account, :backup, :collection |   attr_reader :account, :backup | ||||||
| 
 | 
 | ||||||
|   def call(backup) |   def call(backup) | ||||||
|     @backup  = backup |     @backup  = backup | ||||||
|     @account = backup.user.account |     @account = backup.user.account | ||||||
| 
 | 
 | ||||||
|     build_json! |  | ||||||
|     build_archive! |     build_archive! | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   private |   private | ||||||
| 
 | 
 | ||||||
|   def build_json! |   def build_outbox_json!(file) | ||||||
|     @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer) |     skeleton = serialize(collection_presenter, ActivityPub::CollectionSerializer) | ||||||
|  |     skeleton[:@context] = full_context | ||||||
|  |     skeleton[:orderedItems] = ['!PLACEHOLDER!'] | ||||||
|  |     skeleton = Oj.dump(skeleton) | ||||||
|  |     prepend, append = skeleton.split('"!PLACEHOLDER!"') | ||||||
|  |     add_comma = false | ||||||
|  | 
 | ||||||
|  |     file.write(prepend) | ||||||
| 
 | 
 | ||||||
|     account.statuses.with_includes.reorder(nil).find_in_batches do |statuses| |     account.statuses.with_includes.reorder(nil).find_in_batches do |statuses| | ||||||
|       statuses.each do |status| |       file.write(',') if add_comma | ||||||
|         item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer, signer: @account) |       add_comma = true | ||||||
|         item.delete(:@context) | 
 | ||||||
|  |       file.write(statuses.map do |status| | ||||||
|  |         item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer) | ||||||
|  |         item.delete('@context') | ||||||
| 
 | 
 | ||||||
|         unless item[:type] == 'Announce' || item[:object][:attachment].blank? |         unless item[:type] == 'Announce' || item[:object][:attachment].blank? | ||||||
|           item[:object][:attachment].each do |attachment| |           item[:object][:attachment].each do |attachment| | ||||||
|             attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '') |             attachment[:url] = Addressable::URI.parse(attachment[:url]).path.delete_prefix('/system/') | ||||||
|           end |           end | ||||||
|         end |         end | ||||||
| 
 | 
 | ||||||
|         @collection[:orderedItems] << item |         Oj.dump(item) | ||||||
|       end |       end.join(',')) | ||||||
| 
 | 
 | ||||||
|       GC.start |       GC.start | ||||||
|     end |     end | ||||||
|  | 
 | ||||||
|  |     file.write(append) | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def build_archive! |   def build_archive! | ||||||
|     tmp_file = Tempfile.new(%w(archive .tar.gz)) |     tmp_file = Tempfile.new(%w(archive .zip)) | ||||||
| 
 | 
 | ||||||
|     File.open(tmp_file, 'wb') do |file| |     Zip::File.open(tmp_file, create: true) do |zipfile| | ||||||
|       Zlib::GzipWriter.wrap(file) do |gz| |       dump_outbox!(zipfile) | ||||||
|         Gem::Package::TarWriter.new(gz) do |tar| |       dump_media_attachments!(zipfile) | ||||||
|           dump_media_attachments!(tar) |       dump_likes!(zipfile) | ||||||
|           dump_outbox!(tar) |       dump_bookmarks!(zipfile) | ||||||
|           dump_likes!(tar) |       dump_actor!(zipfile) | ||||||
|           dump_bookmarks!(tar) |  | ||||||
|           dump_actor!(tar) |  | ||||||
|         end |  | ||||||
|       end |  | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|     archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.tar.gz" |     archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.zip" | ||||||
| 
 | 
 | ||||||
|     @backup.dump      = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) |     @backup.dump      = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) | ||||||
|     @backup.processed = true |     @backup.processed = true | ||||||
|  | @ -63,27 +71,28 @@ class BackupService < BaseService | ||||||
|     tmp_file.unlink |     tmp_file.unlink | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def dump_media_attachments!(tar) |   def dump_media_attachments!(zipfile) | ||||||
|     MediaAttachment.attached.where(account: account).reorder(nil).find_in_batches do |media_attachments| |     MediaAttachment.attached.where(account: account).reorder(nil).find_in_batches do |media_attachments| | ||||||
|       media_attachments.each do |m| |       media_attachments.each do |m| | ||||||
|         next unless m.file&.path |         path = m.file&.path | ||||||
|  |         next unless path | ||||||
| 
 | 
 | ||||||
|         download_to_tar(tar, m.file, m.file.path) |         path = path.gsub(/\A.*\/system\//, '') | ||||||
|  |         path = path.gsub(/\A\/+/, '') | ||||||
|  |         download_to_zip(zipfile, m.file, path) | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       GC.start |       GC.start | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def dump_outbox!(tar) |   def dump_outbox!(zipfile) | ||||||
|     json = Oj.dump(collection) |     zipfile.get_output_stream('outbox.json') do |io| | ||||||
| 
 |       build_outbox_json!(io) | ||||||
|     tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io| |  | ||||||
|       io.write(json) |  | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def dump_actor!(tar) |   def dump_actor!(zipfile) | ||||||
|     actor = serialize(account, ActivityPub::ActorSerializer) |     actor = serialize(account, ActivityPub::ActorSerializer) | ||||||
| 
 | 
 | ||||||
|     actor[:icon][:url]  = "avatar#{File.extname(actor[:icon][:url])}"  if actor[:icon] |     actor[:icon][:url]  = "avatar#{File.extname(actor[:icon][:url])}"  if actor[:icon] | ||||||
|  | @ -92,51 +101,66 @@ class BackupService < BaseService | ||||||
|     actor[:likes]       = 'likes.json' |     actor[:likes]       = 'likes.json' | ||||||
|     actor[:bookmarks]   = 'bookmarks.json' |     actor[:bookmarks]   = 'bookmarks.json' | ||||||
| 
 | 
 | ||||||
|     download_to_tar(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists? |     download_to_zip(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists? | ||||||
|     download_to_tar(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists? |     download_to_zip(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists? | ||||||
| 
 | 
 | ||||||
|     json = Oj.dump(actor) |     json = Oj.dump(actor) | ||||||
| 
 | 
 | ||||||
|     tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io| |     zipfile.get_output_stream('actor.json') do |io| | ||||||
|       io.write(json) |       io.write(json) | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def dump_likes!(tar) |   def dump_likes!(zipfile) | ||||||
|     collection = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) |     skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) | ||||||
|  |     skeleton.delete(:totalItems) | ||||||
|  |     skeleton[:orderedItems] = ['!PLACEHOLDER!'] | ||||||
|  |     skeleton = Oj.dump(skeleton) | ||||||
|  |     prepend, append = skeleton.split('"!PLACEHOLDER!"') | ||||||
| 
 | 
 | ||||||
|     Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses| |     zipfile.get_output_stream('likes.json') do |io| | ||||||
|       statuses.each do |status| |       io.write(prepend) | ||||||
|         collection[:totalItems] += 1 | 
 | ||||||
|         collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status) |       add_comma = false | ||||||
|  | 
 | ||||||
|  |       Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses| | ||||||
|  |         io.write(',') if add_comma | ||||||
|  |         add_comma = true | ||||||
|  | 
 | ||||||
|  |         io.write(statuses.map do |status| | ||||||
|  |           Oj.dump(ActivityPub::TagManager.instance.uri_for(status)) | ||||||
|  |         end.join(',')) | ||||||
|  | 
 | ||||||
|  |         GC.start | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       GC.start |       io.write(append) | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     json = Oj.dump(collection) |  | ||||||
| 
 |  | ||||||
|     tar.add_file_simple('likes.json', 0o444, json.bytesize) do |io| |  | ||||||
|       io.write(json) |  | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def dump_bookmarks!(tar) |   def dump_bookmarks!(zipfile) | ||||||
|     collection = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) |     skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer) | ||||||
|  |     skeleton.delete(:totalItems) | ||||||
|  |     skeleton[:orderedItems] = ['!PLACEHOLDER!'] | ||||||
|  |     skeleton = Oj.dump(skeleton) | ||||||
|  |     prepend, append = skeleton.split('"!PLACEHOLDER!"') | ||||||
| 
 | 
 | ||||||
|     Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses| |     zipfile.get_output_stream('bookmarks.json') do |io| | ||||||
|       statuses.each do |status| |       io.write(prepend) | ||||||
|         collection[:totalItems] += 1 | 
 | ||||||
|         collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status) |       add_comma = false | ||||||
|  |       Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses| | ||||||
|  |         io.write(',') if add_comma | ||||||
|  |         add_comma = true | ||||||
|  | 
 | ||||||
|  |         io.write(statuses.map do |status| | ||||||
|  |           Oj.dump(ActivityPub::TagManager.instance.uri_for(status)) | ||||||
|  |         end.join(',')) | ||||||
|  | 
 | ||||||
|  |         GC.start | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       GC.start |       io.write(append) | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     json = Oj.dump(collection) |  | ||||||
| 
 |  | ||||||
|     tar.add_file_simple('bookmarks.json', 0o444, json.bytesize) do |io| |  | ||||||
|       io.write(json) |  | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|  | @ -159,10 +183,10 @@ class BackupService < BaseService | ||||||
| 
 | 
 | ||||||
|   CHUNK_SIZE = 1.megabyte |   CHUNK_SIZE = 1.megabyte | ||||||
| 
 | 
 | ||||||
|   def download_to_tar(tar, attachment, filename) |   def download_to_zip(zipfile, attachment, filename) | ||||||
|     adapter = Paperclip.io_adapters.for(attachment) |     adapter = Paperclip.io_adapters.for(attachment) | ||||||
| 
 | 
 | ||||||
|     tar.add_file_simple(filename, 0o444, adapter.size) do |io| |     zipfile.get_output_stream(filename) do |io| | ||||||
|       while (buffer = adapter.read(CHUNK_SIZE)) |       while (buffer = adapter.read(CHUNK_SIZE)) | ||||||
|         io.write(buffer) |         io.write(buffer) | ||||||
|       end |       end | ||||||
|  |  | ||||||
							
								
								
									
										67
									
								
								spec/services/backup_service_spec.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								spec/services/backup_service_spec.rb
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,67 @@ | ||||||
|  | # frozen_string_literal: true | ||||||
|  | 
 | ||||||
|  | require 'rails_helper' | ||||||
|  | 
 | ||||||
|  | RSpec.describe BackupService, type: :service do | ||||||
|  |   subject(:service_call) { described_class.new.call(backup) } | ||||||
|  | 
 | ||||||
|  |   let!(:user)           { Fabricate(:user) } | ||||||
|  |   let!(:attachment)     { Fabricate(:media_attachment, account: user.account) } | ||||||
|  |   let!(:status)         { Fabricate(:status, account: user.account, text: 'Hello', visibility: :public, media_attachments: [attachment]) } | ||||||
|  |   let!(:private_status) { Fabricate(:status, account: user.account, text: 'secret', visibility: :private) } | ||||||
|  |   let!(:favourite)      { Fabricate(:favourite, account: user.account) } | ||||||
|  |   let!(:bookmark)       { Fabricate(:bookmark, account: user.account) } | ||||||
|  |   let!(:backup)         { Fabricate(:backup, user: user) } | ||||||
|  | 
 | ||||||
|  |   def read_zip_file(backup, filename) | ||||||
|  |     file = Paperclip.io_adapters.for(backup.dump) | ||||||
|  |     Zip::File.open(file) do |zipfile| | ||||||
|  |       entry = zipfile.glob(filename).first | ||||||
|  |       return entry.get_input_stream.read | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   it 'marks the backup as processed' do | ||||||
|  |     expect { service_call }.to change(backup, :processed).from(false).to(true) | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   it 'exports outbox.json as expected' do | ||||||
|  |     service_call | ||||||
|  | 
 | ||||||
|  |     json = Oj.load(read_zip_file(backup, 'outbox.json')) | ||||||
|  |     expect(json['@context']).to_not be_nil | ||||||
|  |     expect(json['type']).to eq 'OrderedCollection' | ||||||
|  |     expect(json['totalItems']).to eq 2 | ||||||
|  |     expect(json['orderedItems'][0]['@context']).to be_nil | ||||||
|  |     expect(json['orderedItems'][0]).to include({ | ||||||
|  |       'type' => 'Create', | ||||||
|  |       'object' => include({ | ||||||
|  |         'id' => ActivityPub::TagManager.instance.uri_for(status), | ||||||
|  |         'content' => '<p>Hello</p>', | ||||||
|  |       }), | ||||||
|  |     }) | ||||||
|  |     expect(json['orderedItems'][1]).to include({ | ||||||
|  |       'type' => 'Create', | ||||||
|  |       'object' => include({ | ||||||
|  |         'id' => ActivityPub::TagManager.instance.uri_for(private_status), | ||||||
|  |         'content' => '<p>secret</p>', | ||||||
|  |       }), | ||||||
|  |     }) | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   it 'exports likes.json as expected' do | ||||||
|  |     service_call | ||||||
|  | 
 | ||||||
|  |     json = Oj.load(read_zip_file(backup, 'likes.json')) | ||||||
|  |     expect(json['type']).to eq 'OrderedCollection' | ||||||
|  |     expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(favourite.status)] | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   it 'exports bookmarks.json as expected' do | ||||||
|  |     service_call | ||||||
|  | 
 | ||||||
|  |     json = Oj.load(read_zip_file(backup, 'bookmarks.json')) | ||||||
|  |     expect(json['type']).to eq 'OrderedCollection' | ||||||
|  |     expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(bookmark.status)] | ||||||
|  |   end | ||||||
|  | end | ||||||
		Loading…
	
		Reference in a new issue