From fcc4c9b34a6ab771c9cef6673e817866773e12d0 Mon Sep 17 00:00:00 2001 From: Claire Date: Wed, 18 Jan 2023 16:20:52 +0100 Subject: [PATCH] Change domain block CSV parsing to be more robust and handle more lists (#21470) * Change domain block CSV parsing to be more robust and handle more lists * Add some tests * Improve domain block import validation and reporting --- .../admin/export_domain_allows_controller.rb | 4 +- .../admin/export_domain_blocks_controller.rb | 24 +++++++---- .../admin_export_controller_concern.rb | 10 ----- app/models/admin/import.rb | 43 ++++++++++++++++--- config/locales/en.yml | 1 + .../export_domain_blocks_controller_spec.rb | 34 ++++++++++++--- spec/fixtures/files/domain_blocks.csv | 6 +-- spec/fixtures/files/domain_blocks_list.txt | 3 ++ 8 files changed, 88 insertions(+), 37 deletions(-) create mode 100644 spec/fixtures/files/domain_blocks_list.txt diff --git a/app/controllers/admin/export_domain_allows_controller.rb b/app/controllers/admin/export_domain_allows_controller.rb index 57fb12c620..adfc39da21 100644 --- a/app/controllers/admin/export_domain_allows_controller.rb +++ b/app/controllers/admin/export_domain_allows_controller.rb @@ -23,9 +23,7 @@ module Admin @import = Admin::Import.new(import_params) return render :new unless @import.validate - parse_import_data!(export_headers) - - @data.take(Admin::Import::ROWS_PROCESSING_LIMIT).each do |row| + @import.csv_rows.each do |row| domain = row['#domain'].strip next if DomainAllow.allowed?(domain) diff --git a/app/controllers/admin/export_domain_blocks_controller.rb b/app/controllers/admin/export_domain_blocks_controller.rb index fb0cd05d29..816422d4ff 100644 --- a/app/controllers/admin/export_domain_blocks_controller.rb +++ b/app/controllers/admin/export_domain_blocks_controller.rb @@ -23,24 +23,30 @@ module Admin @import = Admin::Import.new(import_params) return render :new unless @import.validate - parse_import_data!(export_headers) - @global_private_comment = I18n.t('admin.export_domain_blocks.import.private_comment_template', source: @import.data_file_name, date: I18n.l(Time.now.utc)) @form = Form::DomainBlockBatch.new - @domain_blocks = @data.take(Admin::Import::ROWS_PROCESSING_LIMIT).filter_map do |row| + @domain_blocks = @import.csv_rows.filter_map do |row| domain = row['#domain'].strip next if DomainBlock.rule_for(domain).present? domain_block = DomainBlock.new(domain: domain, - severity: row['#severity'].strip, - reject_media: row['#reject_media'].strip, - reject_reports: row['#reject_reports'].strip, + severity: row.fetch('#severity', :suspend), + reject_media: row.fetch('#reject_media', false), + reject_reports: row.fetch('#reject_reports', false), private_comment: @global_private_comment, - public_comment: row['#public_comment']&.strip, - obfuscate: row['#obfuscate'].strip) + public_comment: row['#public_comment'], + obfuscate: row.fetch('#obfuscate', false)) + + if domain_block.invalid? + flash.now[:alert] = I18n.t('admin.export_domain_blocks.invalid_domain_block', error: domain_block.errors.full_messages.join(', ')) + next + end - domain_block if domain_block.valid? + domain_block + rescue ArgumentError => e + flash.now[:alert] = I18n.t('admin.export_domain_blocks.invalid_domain_block', error: e.message) + next end @warning_domains = Instance.where(domain: @domain_blocks.map(&:domain)).where('EXISTS (SELECT 1 FROM follows JOIN accounts ON follows.account_id = accounts.id OR follows.target_account_id = accounts.id WHERE accounts.domain = instances.domain)').pluck(:domain) diff --git a/app/controllers/concerns/admin_export_controller_concern.rb b/app/controllers/concerns/admin_export_controller_concern.rb index b40c76557f..4ac48a04b7 100644 --- a/app/controllers/concerns/admin_export_controller_concern.rb +++ b/app/controllers/concerns/admin_export_controller_concern.rb @@ -26,14 +26,4 @@ module AdminExportControllerConcern def import_params params.require(:admin_import).permit(:data) end - - def import_data_path - params[:admin_import][:data].path - end - - def parse_import_data!(default_headers) - data = CSV.read(import_data_path, headers: true, encoding: 'UTF-8') - data = CSV.read(import_data_path, headers: default_headers, encoding: 'UTF-8') unless data.headers&.first&.strip&.include?(default_headers[0]) - @data = data.reject(&:blank?) - end end diff --git a/app/models/admin/import.rb b/app/models/admin/import.rb index 79c0722d53..fecde4878b 100644 --- a/app/models/admin/import.rb +++ b/app/models/admin/import.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'csv' + # A non-activerecord helper class for csv upload class Admin::Import include ActiveModel::Model @@ -15,17 +17,46 @@ class Admin::Import data.original_filename end + def csv_rows + csv_data.rewind + + csv_data.take(ROWS_PROCESSING_LIMIT + 1) + end + private - def validate_data - return if data.blank? + def csv_data + return @csv_data if defined?(@csv_data) + + csv_converter = lambda do |field, field_info| + case field_info.header + when '#domain', '#public_comment' + field&.strip + when '#severity' + field&.strip&.to_sym + when '#reject_media', '#reject_reports', '#obfuscate' + ActiveModel::Type::Boolean.new.cast(field) + else + field + end + end + + @csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: true, converters: csv_converter) + @csv_data.take(1) # Ensure the headers are read + @csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: ['#domain'], converters: csv_converter) unless @csv_data.headers&.first == '#domain' + @csv_data + end - csv_data = CSV.read(data.path, encoding: 'UTF-8') + def csv_row_count + return @csv_row_count if defined?(@csv_row_count) - row_count = csv_data.size - row_count -= 1 if csv_data.first&.first == '#domain' + csv_data.rewind + @csv_row_count = csv_data.take(ROWS_PROCESSING_LIMIT + 2).count + end - errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if row_count > ROWS_PROCESSING_LIMIT + def validate_data + return if data.nil? + errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if csv_row_count > ROWS_PROCESSING_LIMIT rescue CSV::MalformedCSVError => e errors.add(:data, I18n.t('imports.errors.invalid_csv_file', error: e.message)) end diff --git a/config/locales/en.yml b/config/locales/en.yml index 763110c770..4143aab045 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -441,6 +441,7 @@ en: private_comment_description_html: 'To help you track where imported blocks come from, imported blocks will be created with the following private comment: %{comment}' private_comment_template: Imported from %{source} on %{date} title: Import domain blocks + invalid_domain_block: 'One or more domain blocks were skipped because of the following error(s): %{error}' new: title: Import domain blocks no_file: No file selected diff --git a/spec/controllers/admin/export_domain_blocks_controller_spec.rb b/spec/controllers/admin/export_domain_blocks_controller_spec.rb index 8697e0c215..2766102c89 100644 --- a/spec/controllers/admin/export_domain_blocks_controller_spec.rb +++ b/spec/controllers/admin/export_domain_blocks_controller_spec.rb @@ -9,9 +9,9 @@ RSpec.describe Admin::ExportDomainBlocksController, type: :controller do describe 'GET #export' do it 'renders instances' do - Fabricate(:domain_block, domain: 'bad.domain', severity: 'silence', public_comment: 'bad') - Fabricate(:domain_block, domain: 'worse.domain', severity: 'suspend', reject_media: true, reject_reports: true, public_comment: 'worse', obfuscate: true) - Fabricate(:domain_block, domain: 'reject.media', severity: 'noop', reject_media: true, public_comment: 'reject media') + Fabricate(:domain_block, domain: 'bad.domain', severity: 'silence', public_comment: 'bad server') + Fabricate(:domain_block, domain: 'worse.domain', severity: 'suspend', reject_media: true, reject_reports: true, public_comment: 'worse server', obfuscate: true) + Fabricate(:domain_block, domain: 'reject.media', severity: 'noop', reject_media: true, public_comment: 'reject media and test unicode characters ♥') Fabricate(:domain_block, domain: 'no.op', severity: 'noop', public_comment: 'noop') get :export, params: { format: :csv } @@ -21,10 +21,32 @@ RSpec.describe Admin::ExportDomainBlocksController, type: :controller do end describe 'POST #import' do - it 'blocks imported domains' do - post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks.csv') } } + context 'with complete domain blocks CSV' do + before do + post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks.csv') } } + end - expect(assigns(:domain_blocks).map(&:domain)).to match_array ['bad.domain', 'worse.domain', 'reject.media'] + it 'renders page with expected domain blocks' do + expect(assigns(:domain_blocks).map { |block| [block.domain, block.severity.to_sym] }).to match_array [['bad.domain', :silence], ['worse.domain', :suspend], ['reject.media', :noop]] + end + + it 'returns http success' do + expect(response).to have_http_status(200) + end + end + + context 'with a list of only domains' do + before do + post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks_list.txt') } } + end + + it 'renders page with expected domain blocks' do + expect(assigns(:domain_blocks).map { |block| [block.domain, block.severity.to_sym] }).to match_array [['bad.domain', :suspend], ['worse.domain', :suspend], ['reject.media', :suspend]] + end + + it 'returns http success' do + expect(response).to have_http_status(200) + end end end diff --git a/spec/fixtures/files/domain_blocks.csv b/spec/fixtures/files/domain_blocks.csv index 28ffb91751..9dbfb4eaf7 100644 --- a/spec/fixtures/files/domain_blocks.csv +++ b/spec/fixtures/files/domain_blocks.csv @@ -1,4 +1,4 @@ #domain,#severity,#reject_media,#reject_reports,#public_comment,#obfuscate -bad.domain,silence,false,false,bad,false -worse.domain,suspend,true,true,worse,true -reject.media,noop,true,false,reject media,false +bad.domain,silence,false,false,bad server,false +worse.domain,suspend,true,true,worse server,true +reject.media,noop,true,false,reject media and test unicode characters ♥,false diff --git a/spec/fixtures/files/domain_blocks_list.txt b/spec/fixtures/files/domain_blocks_list.txt new file mode 100644 index 0000000000..7b6b242533 --- /dev/null +++ b/spec/fixtures/files/domain_blocks_list.txt @@ -0,0 +1,3 @@ +bad.domain +worse.domain +reject.media