diff --git a/docs/mkdocs/docs/api/arctic_uri.md b/docs/mkdocs/docs/api/arctic_uri.md index 7e820007a9..bc413ad254 100644 --- a/docs/mkdocs/docs/api/arctic_uri.md +++ b/docs/mkdocs/docs/api/arctic_uri.md @@ -42,8 +42,9 @@ Additional options specific for ArcticDB: | Container | Azure container for blobs | | Path_prefix | Path within Azure container to use for data storage | | CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.

Default certificate path in various Linux distributions:
`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.
`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6
`/etc/ssl/ca-bundle.pem` for OpenSUSE
`/etc/pki/tls/cacert.pem` for OpenELEC
`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7
`/etc/ssl/cert.pem` for Alpine Linux | +| CA_cert_dir | (Non-Windows platform only) Azure CA certificate directory. It sets option ``CURLOPT_CAPATH`` in Azure SDK's libcurl backend. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files exist (https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html) | -For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting. +For Windows user, `CA_cert_path` AND `CA_cert_dir` cannot be set. Please set CA certificate related option on Windows setting. For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`. diff --git a/python/arcticdb/storage_fixtures/azure.py b/python/arcticdb/storage_fixtures/azure.py index c7a34e485b..bddf579ba9 100644 --- a/python/arcticdb/storage_fixtures/azure.py +++ b/python/arcticdb/storage_fixtures/azure.py @@ -12,7 +12,7 @@ from tempfile import mkdtemp from .api import * -from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree +from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap from arcticdb.version_store.helper import add_azure_library_to_env @@ -40,13 +40,14 @@ def _set_uri_and_client(self, auth: str): f = self.factory self.arctic_uri = ( - f"azure://DefaultEndpointsProtocol=http;{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};" - f"Container={self.container};CA_cert_path={f.ca_cert_path}" + f"azure://DefaultEndpointsProtocol={f.http_protocol};{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};" + f"Container={self.container};CA_cert_path={f.client_cert_file}" ) + # CA_cert_dir is skipped on purpose; It will be test manually in other tests # The retry_policy instance will be modified by the pipeline, so cannot be constant policy = {"connection_timeout": 1, "read_timeout": 2, "retry_policy": LinearRetry(retry_total=3, backoff=1)} - self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy) + self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy, connection_verify=f.client_cert_file) # add connection_verify=False to bypass ssl checking def __init__(self, factory: "AzuriteStorageFixtureFactory") -> None: @@ -93,7 +94,7 @@ def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap: env_name=Defaults.ENV, container_name=self.container, endpoint=self.arctic_uri, - ca_cert_path=self.factory.ca_cert_path, + ca_cert_path=self.factory.client_cert_file, with_prefix=False, # to allow azure_store_factory reuse_name to work correctly ) return cfg @@ -122,22 +123,19 @@ def copy_underlying_objects_to(self, destination: "AzureContainer"): class AzuriteStorageFixtureFactory(StorageFixtureFactory): - host = "127.0.0.1" + host = "localhost" # Per https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#configure-a-connection-string-for-azurite account_name = "devstoreaccount1" account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" - # Default cert path is used; May run into problem on Linux's non RHEL distribution - # See more on https://github.com/man-group/ArcticDB/issues/514 - ca_cert_path = "" - enforcing_permissions = False """Set to True to create AzureContainer with SAS authentication""" - def __init__(self, port=0, working_dir: Optional[str] = None): + def __init__(self, port=0, working_dir: Optional[str] = None, use_ssl: bool = True): + self.http_protocol = "https" if use_ssl else "http" self.port = port or get_ephemeral_port(0) - self.endpoint_root = f"http://{self.host}:{self.port}" + self.endpoint_root = f"{self.http_protocol}://{self.host}:{self.port}" self.working_dir = str(working_dir) if working_dir else mkdtemp(suffix="AzuriteStorageFixtureFactory") def __str__(self): @@ -145,6 +143,15 @@ def __str__(self): def _safe_enter(self): args = f"{shutil.which('azurite')} --blobPort {self.port} --blobHost {self.host} --queuePort 0 --tablePort 0" + if self.http_protocol == "https": + self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir) + self.client_cert_dir = self.working_dir + args += f" --key {self.key_file} --cert {self.cert_file}" + else: + self.key_file = "" + self.cert_file = "" + self.client_cert_file = "" + self.client_cert_dir = "" self._p = GracefulProcessUtils.start(args, cwd=self.working_dir) wait_for_server_to_come_up(self.endpoint_root, "azurite", self._p) return self diff --git a/python/arcticdb/storage_fixtures/s3.py b/python/arcticdb/storage_fixtures/s3.py index 7a5bf2e85b..2d7537e221 100644 --- a/python/arcticdb/storage_fixtures/s3.py +++ b/python/arcticdb/storage_fixtures/s3.py @@ -22,7 +22,7 @@ from typing import NamedTuple, Optional, Any, Type from .api import * -from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree +from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap from arcticdb.version_store.helper import add_s3_library_to_env @@ -282,21 +282,8 @@ def _start_server(self): self.ssl = self.http_protocol == "https" # In real world, using https protocol doesn't necessarily mean ssl will be verified if self.http_protocol == "https": - self.key_file = os.path.join(self.working_dir, "key.pem") - self.cert_file = os.path.join(self.working_dir, "cert.pem") - self.client_cert_file = os.path.join(self.working_dir, "client.pem") - ca = trustme.CA() - server_cert = ca.issue_cert("localhost") - server_cert.private_key_pem.write_to_path(self.key_file) - server_cert.cert_chain_pems[0].write_to_path(self.cert_file) - ca.cert_pem.write_to_path(self.client_cert_file) + self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir) self.client_cert_dir = self.working_dir - # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1 - subprocess.run( - f'ln -s "{self.client_cert_file}" "$(openssl x509 -hash -noout -in "{self.client_cert_file}")".0', - cwd=self.working_dir, - shell=True, - ) else: self.key_file = "" self.cert_file = "" diff --git a/python/arcticdb/storage_fixtures/utils.py b/python/arcticdb/storage_fixtures/utils.py index 86e36246fe..efc5fa8171 100644 --- a/python/arcticdb/storage_fixtures/utils.py +++ b/python/arcticdb/storage_fixtures/utils.py @@ -136,3 +136,21 @@ def safer_rmtree(fixture, path): time.sleep(1) with handler: # Even with ignore_errors=True, rmtree might still throw on Windows.... shutil.rmtree(path, ignore_errors=True) + + +def get_ca_cert_for_testing(working_dir): + key_file = os.path.join(working_dir, "key.pem") + cert_file = os.path.join(working_dir, "cert.pem") + client_cert_file = os.path.join(working_dir, "client.pem") + ca = trustme.CA() + server_cert = ca.issue_cert("localhost") + server_cert.private_key_pem.write_to_path(key_file) + server_cert.cert_chain_pems[0].write_to_path(cert_file) + ca.cert_pem.write_to_path(client_cert_file) + # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1 + subprocess.run( + f'ln -s "{client_cert_file}" "$(openssl x509 -hash -noout -in "{client_cert_file}")".0', + cwd=working_dir, + shell=True, + ) + return ca, key_file, cert_file, client_cert_file # Need to keep ca alive to authenticate the cert \ No newline at end of file