From dd81016b05ccae1dbaf3b8f4365e8a54fff75aa4 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Thu, 27 Jan 2022 08:42:07 +0100 Subject: [PATCH] [fix] certificate retrieval In order to grade TLS certificates with CryptCheck searxstats has to retrieve the certificates. Previously this was done by monkey patching httpx.backends.asyncio.AsyncioBackend. The backend parameter for AsyncClient has however been removed in httpx 0.12.0[1]. Since monkey patching private APIs isn't a good idea anyway this commit instead monkey patches the public API of the ssl.SSLContext class of the standard library. [1]: https://github.com/encode/httpx/issues/782 Fixes #89. --- requirements.txt | 2 +- searxstats/common/http.py | 6 +-- searxstats/common/ssl_info.py | 74 +++++++++++++---------------------- 3 files changed, 30 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index ad8bf6c..99308a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -pyOpenSSL==19.1.0 +pyOpenSSL==21.0.0 httpx==0.11.0 brotlipy==0.7.0 uvloop==0.14.0 diff --git a/searxstats/common/http.py b/searxstats/common/http.py index ad42b13..b4134e4 100644 --- a/searxstats/common/http.py +++ b/searxstats/common/http.py @@ -12,7 +12,7 @@ from .utils import exception_to_str from .queuecalls import UseQueue from .memoize import Memoize -from .ssl_info import get_httpx_backend +from .ssl_info import SSL_CONTEXT from ..config import TOR_HTTP_PROXY if not sys.version_info.major == 3 and sys.version_info.minor >= 7: @@ -66,9 +66,7 @@ async def new_client(*args, **kwargs): network_type = kwargs['network_type'] kwargs['proxies'] = NETWORK_PROXIES.get(network_type, None) del kwargs['network_type'] - if 'backend' not in kwargs: - kwargs['backend'] = get_httpx_backend() - async with httpx.AsyncClient(*args, **kwargs) as session: + async with httpx.AsyncClient(*args, backend='asyncio', verify=SSL_CONTEXT) as session: session._network_type = network_type # pylint: disable=protected-access yield session diff --git a/searxstats/common/ssl_info.py b/searxstats/common/ssl_info.py index 8e676f8..a637d5b 100644 --- a/searxstats/common/ssl_info.py +++ b/searxstats/common/ssl_info.py @@ -1,7 +1,5 @@ import ssl -import httpx -import httpx.config -import httpx.backends.asyncio +from typing import Dict from OpenSSL.crypto import load_certificate, FILETYPE_ASN1 @@ -47,56 +45,38 @@ def update_obj_with_bin(cert_obj, cert_bin): cert_obj['subject']['altName'] = str(ex) -class SslInfo: +SSL_CONTEXT = ssl.create_default_context() - __slots__ = ['_ssl_info'] +_ssl_objects: Dict[str, ssl.SSLObject] = {} - def __init__(self): - self._ssl_info = dict() +_wrap_bio = SSL_CONTEXT.wrap_bio - def parse_sslobject(self, hostname: str, sslobj: ssl.SSLObject): - if sslobj is None: - return - if hostname not in self._ssl_info: - cert_dict = sslobj.getpeercert(binary_form=False) - cert_bin = sslobj.getpeercert(binary_form=True) - # make cert_obj using cert_dict and cert_bin - cert_obj = cert_to_obj(cert_dict) - if cert_bin is not None and 'sha256' not in cert_obj: - update_obj_with_bin(cert_obj, cert_bin) - # store values - self._ssl_info[hostname] = { - 'version': sslobj.version(), - 'certificate': cert_obj - } - def get(self, hostname: str): - return self._ssl_info.get(hostname, {}) +def patched_wrap_bio(incoming: ssl.MemoryBIO, outgoing: ssl.MemoryBIO, server_hostname: str, **kwargs) -> ssl.SSLObject: + global _ssl_objects + ssl_object = _wrap_bio(incoming, outgoing, server_hostname=server_hostname, **kwargs) + _ssl_objects[server_hostname] = ssl_object + return ssl_object -class AsyncioBackendLogCert(httpx.backends.asyncio.AsyncioBackend): - - __slots__ = ['_sslinfo'] - - def __init__(self, sslinfo: SslInfo): - super().__init__() - self._sslinfo = sslinfo - - async def open_tcp_stream(self, hostname, port, ssl_context, timeout): - value = await super().open_tcp_stream(hostname, port, ssl_context, timeout) - sslobj = value.stream_reader._transport.get_extra_info('ssl_object') # pylint: disable=protected-access - self._sslinfo.parse_sslobject(hostname, sslobj) - return value - - -SSLINFO = SslInfo() - - -def get_httpx_backend(): - global SSLINFO # pylint: disable=global-statement - return httpx.backends.asyncio.AsyncioBackend() +# we monkey patch SSL_CONTEXT to store SSLObjects in _ssl_objects +# (subclassing ssl.SSLContext for some reason didn't work reliably) +SSL_CONTEXT.wrap_bio = patched_wrap_bio def get_ssl_info(hostname): - global SSLINFO # pylint: disable=global-statement - return SSLINFO.get(hostname) + global _ssl_objects # pylint: disable=global-statement + ssl_object = _ssl_objects.get(hostname) + if ssl_object: + cert_dict = ssl_object.getpeercert(binary_form=False) + cert_bin = ssl_object.getpeercert(binary_form=True) + # make cert_obj using cert_dict and cert_bin + cert_obj = cert_to_obj(cert_dict) + if cert_bin is not None and 'sha256' not in cert_obj: + update_obj_with_bin(cert_obj, cert_bin) + return { + 'version': ssl_object.version(), + 'certificate': cert_obj + } + else: + return {}