Language improvements, replace whatlanguage with CLD (#2753)

* add failing en specs

* add cld2 gem

* Replace WhatLanguage with CLD
th-downstream
Matt Jankowski 8 years ago committed by Eugen Rochko
parent 5cb8f0c0b2
commit 9db85085a9

@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
gem 'paperclip-av-transcoder' gem 'paperclip-av-transcoder'
gem 'addressable' gem 'addressable'
gem 'cld2', require: 'cld'
gem 'devise' gem 'devise'
gem 'devise-two-factor' gem 'devise-two-factor'
gem 'doorkeeper' gem 'doorkeeper'
@ -56,7 +57,6 @@ gem 'statsd-instrument'
gem 'twitter-text' gem 'twitter-text'
gem 'tzinfo-data' gem 'tzinfo-data'
gem 'webpacker', '~>1.2' gem 'webpacker', '~>1.2'
gem 'whatlanguage'
# For some reason the view specs start failing without this # For some reason the view specs start failing without this
gem 'react-rails' gem 'react-rails'

@ -102,6 +102,8 @@ GEM
rack-test (>= 0.5.4) rack-test (>= 0.5.4)
xpath (~> 2.0) xpath (~> 2.0)
chunky_png (1.3.8) chunky_png (1.3.8)
cld2 (1.0.3)
ffi (~> 1.9.3)
climate_control (0.1.0) climate_control (0.1.0)
cocaine (0.5.8) cocaine (0.5.8)
climate_control (>= 0.0.3, < 1.0) climate_control (>= 0.0.3, < 1.0)
@ -153,6 +155,7 @@ GEM
faker (1.7.3) faker (1.7.3)
i18n (~> 0.5) i18n (~> 0.5)
fast_blank (1.0.0) fast_blank (1.0.0)
ffi (1.9.18)
fuubar (2.2.0) fuubar (2.2.0)
rspec-core (~> 3.0) rspec-core (~> 3.0)
ruby-progressbar (~> 1.4) ruby-progressbar (~> 1.4)
@ -463,7 +466,6 @@ GEM
websocket-driver (0.6.5) websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2) websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0) xpath (2.0.0)
nokogiri (~> 1.3) nokogiri (~> 1.3)
@ -484,6 +486,7 @@ DEPENDENCIES
capistrano-rbenv capistrano-rbenv
capistrano-yarn capistrano-yarn
capybara capybara
cld2
devise devise
devise-two-factor devise-two-factor
doorkeeper doorkeeper
@ -549,7 +552,6 @@ DEPENDENCIES
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
webmock webmock
webpacker (~> 1.2) webpacker (~> 1.2)
whatlanguage
RUBY VERSION RUBY VERSION
ruby 2.4.1p111 ruby 2.4.1p111

@ -9,11 +9,23 @@ class LanguageDetector
end end
def to_iso_s def to_iso_s
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym detected_language_code || default_locale.to_sym
end end
private private
def detected_language_code
detected_language[:code].to_sym if detected_language_reliable?
end
def detected_language
@_detected_language ||= CLD.detect_language(text_without_urls)
end
def detected_language_reliable?
detected_language[:reliable]
end
def text_without_urls def text_without_urls
text.dup.tap do |new_text| text.dup.tap do |new_text|
URI.extract(new_text).each do |url| URI.extract(new_text).each do |url|

@ -3,11 +3,17 @@ require 'rails_helper'
describe LanguageDetector do describe LanguageDetector do
describe 'to_iso_s' do describe 'to_iso_s' do
it 'detects english language' do it 'detects english language for basic strings' do
string = 'Hello and welcome to mastodon' strings = [
"Hello and welcome to mastodon",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.new(string).to_iso_s result = described_class.new(string).to_iso_s
expect(result).to eq :en expect(result).to eq(:en), string
end
end end
it 'detects spanish language' do it 'detects spanish language' do
@ -19,15 +25,15 @@ describe LanguageDetector do
describe 'when language can\'t be detected' do describe 'when language can\'t be detected' do
it 'confirm language engine cant detect' do it 'confirm language engine cant detect' do
result = WhatLanguage.new(:all).language_iso('') result = CLD.detect_language('')
expect(result).to be_nil expect(result[:reliable]).to be false
end end
describe 'because of a URL' do describe 'because of a URL' do
it 'uses default locale when sent just a URL' do it 'uses default locale when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
wl_result = WhatLanguage.new(:all).language_iso(string) cld_result = CLD.detect_language(string)[:code]
expect(wl_result).not_to eq :en expect(cld_result).not_to eq :en
result = described_class.new(string).to_iso_s result = described_class.new(string).to_iso_s

Loading…
Cancel
Save