Add cache for OEmbed endpoints to avoid extra HTTP requests (#12403)

* add youtube oembed endpoint

* add check for oembed endpoint

* change unless for a more readable if

* clear blank lines

* endpoint via https

* Fix string literal in condition

* use cache for endpoints

* use cache for endpoints

* clean up and adding check

* clean up and remove redundant return

* add html check

* add false to return

* use double quotes

* use double quotes

* Clean up
This commit is contained in:
Eugen Rochko 2019-11-17 18:40:33 +01:00 committed by GitHub
parent d13e680f74
commit 357a2e5564
3 changed files with 64 additions and 8 deletions

View file

@ -39,6 +39,12 @@ class FetchLinkCardService < BaseService
def process_url def process_url
@card ||= PreviewCard.new(url: @url) @card ||= PreviewCard.new(url: @url)
attempt_oembed || attempt_opengraph
end
def html
return @html if defined?(@html)
Request.new(:get, @url).perform do |res| Request.new(:get, @url).perform do |res|
if res.code == 200 && res.mime_type == 'text/html' if res.code == 200 && res.mime_type == 'text/html'
@html = res.body_with_limit @html = res.body_with_limit
@ -48,10 +54,6 @@ class FetchLinkCardService < BaseService
@html_charset = nil @html_charset = nil
end end
end end
return if @html.nil?
attempt_oembed || attempt_opengraph
end end
def attach_card def attach_card
@ -89,11 +91,16 @@ class FetchLinkCardService < BaseService
def attempt_oembed def attempt_oembed
service = FetchOEmbedService.new service = FetchOEmbedService.new
embed = service.call(@url, html: @html) url_domain = Addressable::URI.parse(@url).normalized_host
url = Addressable::URI.parse(service.endpoint_url) cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}")
embed = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil?
embed ||= service.call(@url, html: html) unless html.nil?
return false if embed.nil? return false if embed.nil?
url = Addressable::URI.parse(service.endpoint_url)
@card.type = embed[:type] @card.type = embed[:type]
@card.title = embed[:title] || '' @card.title = embed[:title] || ''
@card.author_name = embed[:author_name] || '' @card.author_name = embed[:author_name] || ''
@ -127,6 +134,8 @@ class FetchLinkCardService < BaseService
end end
def attempt_opengraph def attempt_opengraph
return if html.nil?
detector = CharlockHolmes::EncodingDetector.new detector = CharlockHolmes::EncodingDetector.new
detector.strip_tags = true detector.strip_tags = true

View file

@ -1,13 +1,20 @@
# frozen_string_literal: true # frozen_string_literal: true
class FetchOEmbedService class FetchOEmbedService
ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
attr_reader :url, :options, :format, :endpoint_url attr_reader :url, :options, :format, :endpoint_url
def call(url, options = {}) def call(url, options = {})
@url = url @url = url
@options = options @options = options
if @options[:cached_endpoint]
parse_cached_endpoint!
else
discover_endpoint! discover_endpoint!
end
fetch! fetch!
end end
@ -32,10 +39,32 @@ class FetchOEmbedService
return if @endpoint_url.blank? return if @endpoint_url.blank?
@endpoint_url = (Addressable::URI.parse(@url) + @endpoint_url).to_s @endpoint_url = (Addressable::URI.parse(@url) + @endpoint_url).to_s
cache_endpoint!
rescue Addressable::URI::InvalidURIError rescue Addressable::URI::InvalidURIError
@endpoint_url = nil @endpoint_url = nil
end end
def parse_cached_endpoint!
cached = @options[:cached_endpoint]
return if cached[:endpoint].nil? || cached[:format].nil?
@endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
@format = cached[:format]
end
def cache_endpoint!
url_domain = Addressable::URI.parse(@url).normalized_host
endpoint_hash = {
endpoint: @endpoint_url.gsub(URI.encode_www_form_component(@url), '{url}'),
format: @format,
}
Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
end
def fetch! def fetch!
return if @endpoint_url.blank? return if @endpoint_url.blank?

View file

@ -113,6 +113,24 @@ describe FetchOEmbedService, type: :service do
end end
context 'when endpoint is cached' do
before do
stub_request(:get, 'http://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=dqwpQarrDwk').to_return(
status: 200,
headers: { 'Content-Type': 'text/html' },
body: request_fixture('oembed_json_empty.html')
)
end
it 'returns new provider without fetching original URL first' do
subject.call('https://www.youtube.com/watch?v=dqwpQarrDwk', cached_endpoint: { endpoint: 'http://www.youtube.com/oembed?format=json&url={url}', format: :json })
expect(a_request(:get, 'https://www.youtube.com/watch?v=dqwpQarrDwk')).to_not have_been_made
expect(subject.endpoint_url).to eq 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk'
expect(subject.format).to eq :json
expect(a_request(:get, 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk')).to have_been_made
end
end
context 'when status code is not 200' do context 'when status code is not 200' do
before do before do
stub_request(:get, 'https://host.test/oembed.html').to_return( stub_request(:get, 'https://host.test/oembed.html').to_return(