Add language detection (#1772)

* Add language detection via WhatLanguage and (de)serialization of it through Atom

* Fix default language in ProcessFeedService

* Re-add newline before 'react-rails' Gem to fix groupings

Fixes Code Climate issue
main
Eugen 8 years ago committed by GitHub
parent 6d70a80263
commit e4af4898de

@ -56,6 +56,7 @@ gem 'sprockets-rails', :require => 'sprockets/railtie'
gem 'statsd-instrument' gem 'statsd-instrument'
gem 'twitter-text' gem 'twitter-text'
gem 'tzinfo-data' gem 'tzinfo-data'
gem 'whatlanguage'
gem 'react-rails' gem 'react-rails'
gem 'browserify-rails' gem 'browserify-rails'

@ -455,6 +455,7 @@ GEM
websocket-driver (0.6.5) websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2) websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0) xpath (2.0.0)
nokogiri (~> 1.3) nokogiri (~> 1.3)
@ -541,6 +542,7 @@ DEPENDENCIES
tzinfo-data tzinfo-data
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
webmock webmock
whatlanguage
RUBY VERSION RUBY VERSION
ruby 2.4.1p111 ruby 2.4.1p111

@ -327,8 +327,8 @@ class AtomSerializer
end end
def serialize_status_attributes(entry, status) def serialize_status_attributes(entry, status)
append_element(entry, 'summary', status.spoiler_text) if status.spoiler_text? append_element(entry, 'summary', status.spoiler_text, 'xml:lang': status.language) if status.spoiler_text?
append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html') append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html', 'xml:lang': status.language)
status.mentions.each do |mentioned| status.mentions.each do |mentioned|
append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account)) append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account))

@ -19,6 +19,7 @@ class PostStatusService < BaseService
sensitive: options[:sensitive], sensitive: options[:sensitive],
spoiler_text: options[:spoiler_text] || '', spoiler_text: options[:spoiler_text] || '',
visibility: options[:visibility], visibility: options[:visibility],
language: detect_language(text),
application: options[:application]) application: options[:application])
attach_media(status, media) attach_media(status, media)
@ -51,6 +52,10 @@ class PostStatusService < BaseService
media.update(status_id: status.id) media.update(status_id: status.id)
end end
def detect_language(text)
WhatLanguage.new(:all).language_iso(text)
end
def process_mentions_service def process_mentions_service
@process_mentions_service ||= ProcessMentionsService.new @process_mentions_service ||= ProcessMentionsService.new
end end

@ -119,6 +119,7 @@ class ProcessFeedService < BaseService
spoiler_text: content_warning(entry), spoiler_text: content_warning(entry),
created_at: published(entry), created_at: published(entry),
reply: thread?(entry), reply: thread?(entry),
language: content_language(entry),
visibility: visibility_scope(entry) visibility: visibility_scope(entry)
) )
@ -238,6 +239,10 @@ class ProcessFeedService < BaseService
xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS).content xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS).content
end end
def content_language(xml = @xml)
xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS)['xml:lang']&.presence || 'en'
end
def content_warning(xml = @xml) def content_warning(xml = @xml)
xml.at_xpath('./xmlns:summary', xmlns: TagManager::XMLNS)&.content || '' xml.at_xpath('./xmlns:summary', xmlns: TagManager::XMLNS)&.content || ''
end end

@ -0,0 +1,5 @@
class AddLanguageToStatuses < ActiveRecord::Migration[5.0]
def change
add_column :statuses, :language, :string, null: false, default: 'en'
end
end

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170414080609) do ActiveRecord::Schema.define(version: 20170414132105) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
@ -40,7 +40,6 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.datetime "header_updated_at" t.datetime "header_updated_at"
t.string "avatar_remote_url" t.string "avatar_remote_url"
t.datetime "subscription_expires_at" t.datetime "subscription_expires_at"
t.datetime "last_webfingered_at"
t.boolean "silenced", default: false, null: false t.boolean "silenced", default: false, null: false
t.boolean "suspended", default: false, null: false t.boolean "suspended", default: false, null: false
t.boolean "locked", default: false, null: false t.boolean "locked", default: false, null: false
@ -48,6 +47,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.integer "statuses_count", default: 0, null: false t.integer "statuses_count", default: 0, null: false
t.integer "followers_count", default: 0, null: false t.integer "followers_count", default: 0, null: false
t.integer "following_count", default: 0, null: false t.integer "following_count", default: 0, null: false
t.datetime "last_webfingered_at"
t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", using: :btree t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", using: :btree
t.index ["url"], name: "index_accounts_on_url", using: :btree t.index ["url"], name: "index_accounts_on_url", using: :btree
@ -244,6 +244,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.boolean "reply", default: false t.boolean "reply", default: false
t.integer "favourites_count", default: 0, null: false t.integer "favourites_count", default: 0, null: false
t.integer "reblogs_count", default: 0, null: false t.integer "reblogs_count", default: 0, null: false
t.string "language", default: "en", null: false
t.index ["account_id"], name: "index_statuses_on_account_id", using: :btree t.index ["account_id"], name: "index_statuses_on_account_id", using: :btree
t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id", using: :btree t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id", using: :btree
t.index ["reblog_of_id"], name: "index_statuses_on_reblog_of_id", using: :btree t.index ["reblog_of_id"], name: "index_statuses_on_reblog_of_id", using: :btree

Loading…
Cancel
Save