From b8386076c5f2c592d3645a600f60aef0717d8c7b Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 28 Jul 2019 05:59:51 +0200 Subject: [PATCH] Change hashtags to preserve first-used casing (#11416) --- app/lib/activitypub/activity/create.rb | 9 ++--- app/models/tag.rb | 36 ++++++++++++++++--- app/services/hashtag_query_service.rb | 4 +-- app/services/process_hashtags_service.rb | 4 +-- ...5042_add_case_insensitive_index_to_tags.rb | 15 ++++++++ db/schema.rb | 5 ++- 6 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index 56c24680a7..000b77df5f 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -148,12 +148,9 @@ class ActivityPub::Activity::Create < ActivityPub::Activity def process_hashtag(tag) return if tag['name'].blank? - hashtag = tag['name'].gsub(/\A#/, '').mb_chars.downcase - hashtag = Tag.where(name: hashtag).first_or_create!(name: hashtag) - - return if @tags.include?(hashtag) - - @tags << hashtag + Tag.find_or_create_by_names(tag['name']) do |hashtag| + @tags << hashtag unless @tags.include?(hashtag) + end rescue ActiveRecord::RecordInvalid nil end diff --git a/app/models/tag.rb b/app/models/tag.rb index b371d59c1b..972242064b 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -20,7 +20,7 @@ class Tag < ApplicationRecord HASHTAG_NAME_RE = '([[:word:]_][[:word:]_·]*[[:alpha:]_·][[:word:]_·]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)' HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i - validates :name, presence: true, uniqueness: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i } + validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i } scope :discoverable, -> { joins(:account_tag_stat).where(AccountTagStat.arel_table[:accounts_count].gt(0)).where(account_tag_stats: { hidden: false }).order(Arel.sql('account_tag_stats.accounts_count desc')) } scope :hidden, -> { where(account_tag_stats: { hidden: true }) } @@ -64,22 +64,48 @@ class Tag < ApplicationRecord end class << self - def search_for(term, limit = 5, offset = 0) - pattern = sanitize_sql_like(term.strip) + '%' + def find_or_create_by_names(name_or_names) + Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name| + tag = matching_name(normalized_name).first || create(name: normalized_name) - Tag.where('lower(name) like lower(?)', pattern) + yield tag if block_given? + + tag + end + end + + def search_for(term, limit = 5, offset = 0) + pattern = sanitize_sql_like(normalize(term.strip)) + '%' + + Tag.where(arel_table[:name].lower.matches(pattern.downcase)) .order(:name) .limit(limit) .offset(offset) end def find_normalized(name) - find_by(name: name.mb_chars.downcase.to_s) + matching_name(name).first end def find_normalized!(name) find_normalized(name) || raise(ActiveRecord::RecordNotFound) end + + def matching_name(name_or_names) + names = Array(name_or_names).map { |name| normalize(name).downcase } + + if names.size == 1 + where(arel_table[:name].lower.eq(names.first)) + else + where(arel_table[:name].lower.in(names)) + end + end + + private + + def normalize(str) + str.gsub(/\A#/, '').mb_chars.to_s + end end private diff --git a/app/services/hashtag_query_service.rb b/app/services/hashtag_query_service.rb index 5773d78c6e..282821710a 100644 --- a/app/services/hashtag_query_service.rb +++ b/app/services/hashtag_query_service.rb @@ -14,7 +14,7 @@ class HashtagQueryService < BaseService private - def tags_for(tags) - Tag.where(name: tags.map(&:downcase)) if tags.presence + def tags_for(names) + Tag.matching_name(names) if names.presence end end diff --git a/app/services/process_hashtags_service.rb b/app/services/process_hashtags_service.rb index b6974e598b..e8e139b058 100644 --- a/app/services/process_hashtags_service.rb +++ b/app/services/process_hashtags_service.rb @@ -5,9 +5,7 @@ class ProcessHashtagsService < BaseService tags = Extractor.extract_hashtags(status.text) if status.local? records = [] - tags.map { |str| str.mb_chars.downcase }.uniq(&:to_s).each do |name| - tag = Tag.where(name: name).first_or_create(name: name) - + Tag.find_or_create_by_names(tags) do |tag| status.tags << tag records << tag diff --git a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb new file mode 100644 index 0000000000..6fa8c0ec43 --- /dev/null +++ b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb @@ -0,0 +1,15 @@ +class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' } + remove_index :tags, name: 'index_tags_on_name' + remove_index :tags, name: 'hashtag_search_index' + end + + def down + add_index :tags, :name, unique: true, algorithm: :concurrently + safety_assured { execute 'CREATE INDEX CONCURRENTLY hashtag_search_index ON tags (name text_pattern_ops)' } + remove_index :tags, name: 'index_tags_on_name_lower' + end +end diff --git a/db/schema.rb b/db/schema.rb index 6319dd932d..1847305c7f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_07_15_164535) do +ActiveRecord::Schema.define(version: 2019_07_26_175042) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -652,8 +652,7 @@ ActiveRecord::Schema.define(version: 2019_07_15_164535) do t.string "name", default: "", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index "lower((name)::text) text_pattern_ops", name: "hashtag_search_index" - t.index ["name"], name: "index_tags_on_name", unique: true + t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true end create_table "tombstones", force: :cascade do |t|