From b0ffc58e02f33f6aa480f008b74495601d988ce1 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 5 Jun 2024 15:43:45 -0400 Subject: [PATCH] curl_httpclient,http1connection: Prohibit CR and LF in headers libcurl does not check for CR and LF in headers, making this the application's responsibility. However, Tornado's other HTTP interfaces check for linefeeds so we should do the same here so that switching between the simple and curl http clients does not introduce header injection vulnerabilties. http1connection previously checked only for LF in headers (alone or in a CRLF pair). It now prohibits bare CR as well, following the requirement in RFC 9112. --- tornado/curl_httpclient.py | 20 ++++++++++++-------- tornado/http1connection.py | 6 ++++-- tornado/test/httpclient_test.py | 16 ++++++++++++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/tornado/curl_httpclient.py b/tornado/curl_httpclient.py index 23320e4822..397c3a9752 100644 --- a/tornado/curl_httpclient.py +++ b/tornado/curl_httpclient.py @@ -19,6 +19,7 @@ import functools import logging import pycurl +import re import threading import time from io import BytesIO @@ -44,6 +45,8 @@ curl_log = logging.getLogger("tornado.curl_httpclient") +CR_OR_LF_RE = re.compile(b"\r|\n") + class CurlAsyncHTTPClient(AsyncHTTPClient): def initialize( # type: ignore @@ -347,14 +350,15 @@ def _curl_setup_request( if "Pragma" not in request.headers: request.headers["Pragma"] = "" - curl.setopt( - pycurl.HTTPHEADER, - [ - b"%s: %s" - % (native_str(k).encode("ASCII"), native_str(v).encode("ISO8859-1")) - for k, v in request.headers.get_all() - ], - ) + encoded_headers = [ + b"%s: %s" + % (native_str(k).encode("ASCII"), native_str(v).encode("ISO8859-1")) + for k, v in request.headers.get_all() + ] + for line in encoded_headers: + if CR_OR_LF_RE.search(line): + raise ValueError("Illegal characters in header (CR or LF): %r" % line) + curl.setopt(pycurl.HTTPHEADER, encoded_headers) curl.setopt( pycurl.HEADERFUNCTION, diff --git a/tornado/http1connection.py b/tornado/http1connection.py index ca50e8ff55..3bdffac115 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -38,6 +38,8 @@ from typing import cast, Optional, Type, Awaitable, Callable, Union, Tuple +CR_OR_LF_RE = re.compile(b"\r|\n") + class _QuietException(Exception): def __init__(self) -> None: @@ -453,8 +455,8 @@ def write_headers( ) lines.extend(line.encode("latin1") for line in header_lines) for line in lines: - if b"\n" in line: - raise ValueError("Newline in header: " + repr(line)) + if CR_OR_LF_RE.search(line): + raise ValueError("Illegal characters (CR or LF) in header: %r" % line) future = None if self.stream.closed(): future = self._write_future = Future() diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py index 31a1916199..17291f8f2e 100644 --- a/tornado/test/httpclient_test.py +++ b/tornado/test/httpclient_test.py @@ -725,6 +725,22 @@ def test_error_after_cancel(self): if el.logged_stack: break + def test_header_crlf(self): + # Ensure that the client doesn't allow CRLF injection in headers. RFC 9112 section 2.2 + # prohibits a bare CR specifically and "a recipient MAY recognize a single LF as a line + # terminator" so we check each character separately as well as the (redundant) CRLF pair. + for header, name in [ + ("foo\rbar:", "cr"), + ("foo\nbar:", "lf"), + ("foo\r\nbar:", "crlf"), + ]: + with self.subTest(name=name, position="value"): + with self.assertRaises(ValueError): + self.fetch("/hello", headers={"foo": header}) + with self.subTest(name=name, position="key"): + with self.assertRaises(ValueError): + self.fetch("/hello", headers={header: "foo"}) + class RequestProxyTest(unittest.TestCase): def test_request_set(self):