Fix request URL normalisation for bare domain and 8-bit characters (#26285)

This commit is contained in:
Christian Schmidt 2023-08-02 19:32:29 +02:00 committed by GitHub
parent 9a214a414e
commit da740ec43c
2 changed files with 68 additions and 9 deletions

View file

@ -76,8 +76,8 @@ class Request
HTTP::URI.new(
scheme: uri.normalized_scheme,
authority: uri.normalized_authority,
path: Addressable::URI.normalize_path(uri.path),
query: uri.query
path: Addressable::URI.normalize_path(encode_non_ascii(uri.path)).presence || '/',
query: encode_non_ascii(uri.query)
)
end
@ -151,6 +151,12 @@ class Request
%w(http https).include?(parsed_url.scheme) && parsed_url.host.present?
end
NON_ASCII_PATTERN = /[^\x00-\x7F]+/
def encode_non_ascii(str)
str&.gsub(NON_ASCII_PATTERN) { |substr| CGI.escape(substr.encode(Encoding::UTF_8)) }
end
def http_client
HTTP.use(:auto_inflate).use(normalize_uri: { normalizer: URI_NORMALIZER }).follow(max_hops: 3)
end

View file

@ -95,6 +95,33 @@ describe Request do
end
end
context 'with bare domain URL' do
let(:url) { 'http://example.com' }
before do
stub_request(:get, 'http://example.com')
end
it 'normalizes path' do
subject.perform do |response|
expect(response.request.uri.path).to eq '/'
end
end
it 'normalizes path used for request signing' do
subject.perform
headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /'
end
it 'normalizes path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET / HTTP/1.1'
end
end
end
context 'with unnormalized URL' do
let(:url) { 'HTTP://EXAMPLE.com:80/foo%41%3A?bar=%41%3A#baz' }
@ -114,18 +141,31 @@ describe Request do
end
end
it 'does modify path' do
it 'does not modify path' do
subject.perform do |response|
expect(response.request.uri.path).to eq '/foo%41%3A'
end
end
it 'does modify query string' do
it 'does not modify query string' do
subject.perform do |response|
expect(response.request.uri.query).to eq 'bar=%41%3A'
end
end
it 'does not modify path used for request signing' do
subject.perform
headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /foo%41%3A'
end
it 'does not modify path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET /foo%41%3A?bar=%41%3A HTTP/1.1'
end
end
it 'strips fragment' do
subject.perform do |response|
expect(response.request.uri.fragment).to be_nil
@ -134,22 +174,35 @@ describe Request do
end
context 'with non-ASCII URL' do
let(:url) { 'http://éxample.com/föo?bär=1' }
let(:url) { 'http://éxample.com:81/föo?bär=1' }
before do
stub_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1')
stub_request(:get, 'http://xn--xample-9ua.com:81/f%C3%B6o?b%C3%A4r=1')
end
it 'IDN-encodes host' do
subject.perform do |response|
expect(response.request.uri.authority).to eq 'xn--xample-9ua.com'
expect(response.request.uri.authority).to eq 'xn--xample-9ua.com:81'
end
end
it 'percent-escapes path and query string' do
it 'IDN-encodes host in Host header' do
subject.perform do |response|
expect(response.request.headers['Host']).to eq 'xn--xample-9ua.com'
end
end
it 'percent-escapes path used for request signing' do
subject.perform
expect(a_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1')).to have_been_made
headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /f%C3%B6o'
end
it 'normalizes path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET /f%C3%B6o?b%C3%A4r=1 HTTP/1.1'
end
end
end