Skip to content

Commit

Permalink
Handle incorrectly UTF-8 encoded query and cookie url
Browse files Browse the repository at this point in the history
Use a different exception when this situation is detected:
* Sanitiser::Strategy::SanitisingError
This error is not sent to Sentry.
See: alphagov/govuk_app_config#402
  • Loading branch information
unoduetre committed Oct 25, 2024
1 parent 82da144 commit 53fbd61
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 1 deletion.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ gem "govuk_personalisation"
gem "govuk_publishing_components"
gem "nokogiri"
gem "plek"
gem "rack-utf8_sanitizer"
gem "redis"
gem "sprockets-rails"
gem "terser"
Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ GEM
rack (>= 3.0.0)
rack-test (2.1.0)
rack (>= 1.3)
rack-utf8_sanitizer (1.9.1)
rack (>= 1.0, < 4.0)
rackup (2.1.0)
rack (>= 3)
webrick (~> 1.8)
Expand Down Expand Up @@ -613,6 +615,7 @@ DEPENDENCIES
nokogiri
plek
pry
rack-utf8_sanitizer
rails (= 7.2.1.1)
redis
rubocop-govuk
Expand Down
12 changes: 11 additions & 1 deletion config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
require "active_support/time"
# require "action_cable/engine"
require "rails/test_unit/railtie"
require_relative "../lib/sanitiser/strategy"

# Require the gems listed in Gemfile, including any gems
# you've limited to :test, :development, or :production.
Expand All @@ -27,7 +28,7 @@ class Application < Rails::Application
# Please, add to the `ignore` list any other `lib` subdirectories that do
# not contain `.rb` files, or that should not be reloaded or eager loaded.
# Common ones are `templates`, `generators`, or `middleware`, for example.
config.autoload_lib(ignore: %w[assets tasks])
config.autoload_lib(ignore: %w[assets tasks sanitiser])

# Settings in config/environments/* take precedence over those specified here.
# Application configuration can go into files in config/initializers
Expand All @@ -44,5 +45,14 @@ class Application < Rails::Application

# Use temporary directory for page cache if filesystem is read-only
config.action_controller.page_cache_directory = File.join(ENV["TMPDIR"], "page_cache") if ENV.fetch("USE_TMPDIR_PAGE_CACHE", "false") == "true"

# Protect from "invalid byte sequence in UTF-8" errors,
# when a query or a cookie is a string with incorrect UTF-8 encoding.
config.middleware.insert_before(
Rack::Head,
Rack::UTF8Sanitizer,
only: %w[QUERY_STRING],
strategy: Sanitiser::Strategy,
)
end
end
18 changes: 18 additions & 0 deletions lib/sanitiser/strategy.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module Sanitiser
class Strategy
class SanitisingError < StandardError; end

class << self
def call(input, sanitize_null_bytes: false)
input
.force_encoding(Encoding::ASCII_8BIT)
.encode!(Encoding::UTF_8)
if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input)
raise NullByteInString
end
rescue StandardError
raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie"
end
end
end
end
84 changes: 84 additions & 0 deletions test/integration/sanitiser_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require "test_helper"

class SanitiserTest < ActiveSupport::TestCase
include Rack::Test::Methods

def app
Rails.application
end

test "with query being correct percent-encoded UTF-8 string does not raise exception" do
get "/templates/scheduled_maintenance.html.erb?%41"
assert last_response.successful?
end

test "with query being incorrect percent-encoded UTF-8 string raises exception" do
assert_raises(Sanitiser::Strategy::SanitisingError) do
get "/templates/scheduled_maintenance.html.erb?%AD"
end
end

test "with cookie key being correct UTF-8" do
header("Cookie", "\x41=value")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end

test "with cookie key being incorrect UTF-8" do
header("Cookie", "\xAD=value")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/templates/scheduled_maintenance.html.erb"
end
end

test "with cookie value being correct UTF-8" do
header("Cookie", "key=\x41")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end

test "with cookie value being incorrect UTF-8" do
header("Cookie", "key=\xAD")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/templates/scheduled_maintenance.html.erb"
end
end

test "with cookie path being correct UTF-8" do
header("Cookie", "key=value; Path=/\x41")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end

test "with cookie path being incorrect UTF-8" do
header("Cookie", "key=value; Path=/\xAD")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/templates/scheduled_maintenance.html.erb"
end
end

test "with cookie path being correct percent-encoded UTF-8" do
header("Cookie", "key=value; Path=/%41")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end

test "with cookie path being incorrect percent-encoded UTF-8" do
header("Cookie", "key=value; Path=/%AD")
assert_raises(Sanitiser::Strategy::SanitisingError) do
get "/templates/scheduled_maintenance.html.erb"
end
end

test "with referrer headerbeing correct percent-encoded UTF-8" do
header("Referer", "http://example.com/?%41")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end

test "with referrer header being incorrect percent-encoded UTF-8" do
header("Referer", "http://example.com/?%AD")
get "/templates/scheduled_maintenance.html.erb"
assert last_response.successful?
end
end

0 comments on commit 53fbd61

Please sign in to comment.