Change importers to avoid a few inefficiencies (#26721)
This commit is contained in:
		
							parent
							
								
									703178c117
								
							
						
					
					
						commit
						dcd8d25ac1
					
				
					 6 changed files with 34 additions and 33 deletions
				
			
		|  | @ -4,10 +4,10 @@ class Importer::AccountsIndexImporter < Importer::BaseImporter | ||||||
|   def import! |   def import! | ||||||
|     scope.includes(:account_stat).find_in_batches(batch_size: @batch_size) do |tmp| |     scope.includes(:account_stat).find_in_batches(batch_size: @batch_size) do |tmp| | ||||||
|       in_work_unit(tmp) do |accounts| |       in_work_unit(tmp) do |accounts| | ||||||
|         bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: accounts).bulk_body |         bulk = build_bulk_body(accounts) | ||||||
| 
 | 
 | ||||||
|         indexed = bulk.count { |entry| entry[:index] } |         indexed = bulk.size | ||||||
|         deleted = bulk.count { |entry| entry[:delete] } |         deleted = 0 | ||||||
| 
 | 
 | ||||||
|         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) |         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -68,6 +68,14 @@ class Importer::BaseImporter | ||||||
| 
 | 
 | ||||||
|   protected |   protected | ||||||
| 
 | 
 | ||||||
|  |   def build_bulk_body(to_import) | ||||||
|  |     # Specialize `Chewy::Index::Import::BulkBuilder#bulk_body` to avoid a few | ||||||
|  |     # inefficiencies, as none of our fields or join fields and we do not need | ||||||
|  |     # `BulkBuilder`'s versatility. | ||||||
|  |     crutches = Chewy::Index::Crutch::Crutches.new index, to_import | ||||||
|  |     to_import.map { |object| { index: { _id: object.id, data: index.compose(object, crutches, fields: []) } } } | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|   def in_work_unit(...) |   def in_work_unit(...) | ||||||
|     work_unit = Concurrent::Promises.future_on(@executor, ...) |     work_unit = Concurrent::Promises.future_on(@executor, ...) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -4,10 +4,10 @@ class Importer::InstancesIndexImporter < Importer::BaseImporter | ||||||
|   def import! |   def import! | ||||||
|     index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp| |     index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp| | ||||||
|       in_work_unit(tmp) do |instances| |       in_work_unit(tmp) do |instances| | ||||||
|         bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: instances).bulk_body |         bulk = build_bulk_body(instances) | ||||||
| 
 | 
 | ||||||
|         indexed = bulk.count { |entry| entry[:index] } |         indexed = bulk.size | ||||||
|         deleted = bulk.count { |entry| entry[:delete] } |         deleted = 0 | ||||||
| 
 | 
 | ||||||
|         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) |         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,11 +5,11 @@ class Importer::PublicStatusesIndexImporter < Importer::BaseImporter | ||||||
|     scope.select(:id).find_in_batches(batch_size: @batch_size) do |batch| |     scope.select(:id).find_in_batches(batch_size: @batch_size) do |batch| | ||||||
|       in_work_unit(batch.pluck(:id)) do |status_ids| |       in_work_unit(batch.pluck(:id)) do |status_ids| | ||||||
|         bulk = ActiveRecord::Base.connection_pool.with_connection do |         bulk = ActiveRecord::Base.connection_pool.with_connection do | ||||||
|           Chewy::Index::Import::BulkBuilder.new(index, to_index: Status.includes(:media_attachments, :preloadable_poll, :preview_cards).where(id: status_ids)).bulk_body |           build_bulk_body(index.adapter.default_scope.where(id: status_ids)) | ||||||
|         end |         end | ||||||
| 
 | 
 | ||||||
|         indexed = bulk.count { |entry| entry[:index] } |         indexed = bulk.size | ||||||
|         deleted = bulk.count { |entry| entry[:delete] } |         deleted = 0 | ||||||
| 
 | 
 | ||||||
|         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) |         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,32 +13,25 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter | ||||||
| 
 | 
 | ||||||
|       scope.find_in_batches(batch_size: @batch_size) do |tmp| |       scope.find_in_batches(batch_size: @batch_size) do |tmp| | ||||||
|         in_work_unit(tmp.map(&:status_id)) do |status_ids| |         in_work_unit(tmp.map(&:status_id)) do |status_ids| | ||||||
|           bulk = ActiveRecord::Base.connection_pool.with_connection do |  | ||||||
|             Chewy::Index::Import::BulkBuilder.new(index, to_index: index.adapter.default_scope.where(id: status_ids)).bulk_body |  | ||||||
|           end |  | ||||||
| 
 |  | ||||||
|           indexed = 0 |  | ||||||
|           deleted = 0 |           deleted = 0 | ||||||
| 
 | 
 | ||||||
|           # We can't use the delete_if proc to do the filtering because delete_if |           bulk = ActiveRecord::Base.connection_pool.with_connection do | ||||||
|           # is called before rendering the data and we need to filter based |             to_index = index.adapter.default_scope.where(id: status_ids) | ||||||
|           # on the results of the filter, so this filtering happens here instead |             crutches = Chewy::Index::Crutch::Crutches.new index, to_index | ||||||
|           bulk.map! do |entry| |             to_index.map do |object| | ||||||
|             new_entry = if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? |               # This is unlikely to happen, but the post may have been | ||||||
|                           { delete: entry[:index].except(:data) } |               # un-interacted with since it was queued for indexing | ||||||
|                         else |               if object.searchable_by.empty? | ||||||
|                           entry |                 deleted += 1 | ||||||
|                         end |                 { delete: { _id: object.id } } | ||||||
| 
 |               else | ||||||
|             if new_entry[:index] |                 { index: { _id: object.id, data: index.compose(object, crutches, fields: []) } } | ||||||
|               indexed += 1 |               end | ||||||
|             else |  | ||||||
|               deleted += 1 |  | ||||||
|             end |             end | ||||||
| 
 |  | ||||||
|             new_entry |  | ||||||
|           end |           end | ||||||
| 
 | 
 | ||||||
|  |           indexed = bulk.size - deleted | ||||||
|  | 
 | ||||||
|           Chewy::Index::Import::BulkRequest.new(index).perform(bulk) |           Chewy::Index::Import::BulkRequest.new(index).perform(bulk) | ||||||
| 
 | 
 | ||||||
|           [indexed, deleted] |           [indexed, deleted] | ||||||
|  |  | ||||||
|  | @ -4,10 +4,10 @@ class Importer::TagsIndexImporter < Importer::BaseImporter | ||||||
|   def import! |   def import! | ||||||
|     index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp| |     index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp| | ||||||
|       in_work_unit(tmp) do |tags| |       in_work_unit(tmp) do |tags| | ||||||
|         bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: tags).bulk_body |         bulk = build_bulk_body(tags) | ||||||
| 
 | 
 | ||||||
|         indexed = bulk.count { |entry| entry[:index] } |         indexed = bulk.size | ||||||
|         deleted = bulk.count { |entry| entry[:delete] } |         deleted = 0 | ||||||
| 
 | 
 | ||||||
|         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) |         Chewy::Index::Import::BulkRequest.new(index).perform(bulk) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue