From 0297acfe70983ed4981205e2cd5747a7127b69a7 Mon Sep 17 00:00:00 2001 From: Claire Date: Thu, 17 Nov 2022 11:42:20 +0100 Subject: [PATCH] Fix various issues with domain block import (#1944) - stop using Paperclip for processing domain allow/block imports - stop leaving temporary files - better error handling - assume CSV files are UTF-8-encoded --- .../admin/export_domain_allows_controller.rb | 6 ++-- .../admin/export_domain_blocks_controller.rb | 6 ++-- .../admin_export_controller_concern.rb | 8 ++--- app/models/admin/import.rb | 35 ++++++++++--------- app/validators/admin_import_validator.rb | 19 ---------- config/locales/en.yml | 1 + 6 files changed, 30 insertions(+), 45 deletions(-) delete mode 100644 app/validators/admin_import_validator.rb diff --git a/app/controllers/admin/export_domain_allows_controller.rb b/app/controllers/admin/export_domain_allows_controller.rb index eb2955ac38..57fb12c620 100644 --- a/app/controllers/admin/export_domain_allows_controller.rb +++ b/app/controllers/admin/export_domain_allows_controller.rb @@ -8,8 +8,6 @@ module Admin before_action :set_dummy_import!, only: [:new] - ROWS_PROCESSING_LIMIT = 20_000 - def new authorize :domain_allow, :create? end @@ -23,9 +21,11 @@ module Admin authorize :domain_allow, :create? begin @import = Admin::Import.new(import_params) + return render :new unless @import.validate + parse_import_data!(export_headers) - @data.take(ROWS_PROCESSING_LIMIT).each do |row| + @data.take(Admin::Import::ROWS_PROCESSING_LIMIT).each do |row| domain = row['#domain'].strip next if DomainAllow.allowed?(domain) diff --git a/app/controllers/admin/export_domain_blocks_controller.rb b/app/controllers/admin/export_domain_blocks_controller.rb index 545bd94edd..fb0cd05d29 100644 --- a/app/controllers/admin/export_domain_blocks_controller.rb +++ b/app/controllers/admin/export_domain_blocks_controller.rb @@ -8,8 +8,6 @@ module Admin before_action :set_dummy_import!, only: [:new] - ROWS_PROCESSING_LIMIT = 20_000 - def new authorize :domain_block, :create? end @@ -23,12 +21,14 @@ module Admin authorize :domain_block, :create? @import = Admin::Import.new(import_params) + return render :new unless @import.validate + parse_import_data!(export_headers) @global_private_comment = I18n.t('admin.export_domain_blocks.import.private_comment_template', source: @import.data_file_name, date: I18n.l(Time.now.utc)) @form = Form::DomainBlockBatch.new - @domain_blocks = @data.take(ROWS_PROCESSING_LIMIT).filter_map do |row| + @domain_blocks = @data.take(Admin::Import::ROWS_PROCESSING_LIMIT).filter_map do |row| domain = row['#domain'].strip next if DomainBlock.rule_for(domain).present? diff --git a/app/controllers/concerns/admin_export_controller_concern.rb b/app/controllers/concerns/admin_export_controller_concern.rb index 013915d025..b40c76557f 100644 --- a/app/controllers/concerns/admin_export_controller_concern.rb +++ b/app/controllers/concerns/admin_export_controller_concern.rb @@ -27,13 +27,13 @@ module AdminExportControllerConcern params.require(:admin_import).permit(:data) end - def import_data - Paperclip.io_adapters.for(@import.data).read + def import_data_path + params[:admin_import][:data].path end def parse_import_data!(default_headers) - data = CSV.parse(import_data, headers: true) - data = CSV.parse(import_data, headers: default_headers) unless data.headers&.first&.strip&.include?(default_headers[0]) + data = CSV.read(import_data_path, headers: true, encoding: 'UTF-8') + data = CSV.read(import_data_path, headers: default_headers, encoding: 'UTF-8') unless data.headers&.first&.strip&.include?(default_headers[0]) @data = data.reject(&:blank?) end end diff --git a/app/models/admin/import.rb b/app/models/admin/import.rb index c305be237a..79c0722d53 100644 --- a/app/models/admin/import.rb +++ b/app/models/admin/import.rb @@ -2,28 +2,31 @@ # A non-activerecord helper class for csv upload class Admin::Import - extend ActiveModel::Callbacks include ActiveModel::Model - include Paperclip::Glue - FILE_TYPES = %w(text/plain text/csv application/csv).freeze + ROWS_PROCESSING_LIMIT = 20_000 - # Paperclip required callbacks - define_model_callbacks :save, only: [:after] - define_model_callbacks :destroy, only: [:before, :after] + attr_accessor :data - attr_accessor :data_file_name, :data_content_type + validates :data, presence: true + validate :validate_data - has_attached_file :data - validates_attachment_content_type :data, content_type: FILE_TYPES - validates_attachment_presence :data - validates_with AdminImportValidator, on: :create - - def save - run_callbacks :save + def data_file_name + data.original_filename end - def destroy - run_callbacks :destroy + private + + def validate_data + return if data.blank? + + csv_data = CSV.read(data.path, encoding: 'UTF-8') + + row_count = csv_data.size + row_count -= 1 if csv_data.first&.first == '#domain' + + errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if row_count > ROWS_PROCESSING_LIMIT + rescue CSV::MalformedCSVError => e + errors.add(:data, I18n.t('imports.errors.invalid_csv_file', error: e.message)) end end diff --git a/app/validators/admin_import_validator.rb b/app/validators/admin_import_validator.rb deleted file mode 100644 index 338ceb3a78..0000000000 --- a/app/validators/admin_import_validator.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -class AdminImportValidator < ActiveModel::Validator - FIRST_HEADER = '#domain' - - def validate(import) - return if import.type.blank? || import.data.blank? - - # We parse because newlines could be part of individual rows. This - # runs on create so we should be reading the local file here before - # it is uploaded to object storage or moved anywhere... - csv_data = CSV.parse(import.data.queued_for_write[:original].read) - - row_count = csv_data.size - row_count -= 1 if csv_data.first&.first == FIRST_HEADER - - import.errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: Admin::DomainBlocksController::ROWS_PROCESSING_LIMIT)) if row_count > Admin::DomainBlocksController::ROWS_PROCESSING_LIMIT - end -end diff --git a/config/locales/en.yml b/config/locales/en.yml index 679e356b41..2f766cef38 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -1159,6 +1159,7 @@ en: invalid_markup: 'contains invalid HTML markup: %{error}' imports: errors: + invalid_csv_file: 'Invalid CSV file. Error: %{error}' over_rows_processing_limit: contains more than %{count} rows modes: merge: Merge