diff --git a/docs/mkdocs/docs/api/arctic_uri.md b/docs/mkdocs/docs/api/arctic_uri.md
index 7e820007a9..bc413ad254 100644
--- a/docs/mkdocs/docs/api/arctic_uri.md
+++ b/docs/mkdocs/docs/api/arctic_uri.md
@@ -42,8 +42,9 @@ Additional options specific for ArcticDB:
| Container | Azure container for blobs |
| Path_prefix | Path within Azure container to use for data storage |
| CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.
Default certificate path in various Linux distributions:
`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.
`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6
`/etc/ssl/ca-bundle.pem` for OpenSUSE
`/etc/pki/tls/cacert.pem` for OpenELEC
`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7
`/etc/ssl/cert.pem` for Alpine Linux |
+| CA_cert_dir | (Non-Windows platform only) Azure CA certificate directory. It sets option ``CURLOPT_CAPATH`` in Azure SDK's libcurl backend. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files exist (https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html) |
-For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting.
+For Windows user, `CA_cert_path` AND `CA_cert_dir` cannot be set. Please set CA certificate related option on Windows setting.
For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate
Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`.
diff --git a/python/arcticdb/storage_fixtures/azure.py b/python/arcticdb/storage_fixtures/azure.py
index c7a34e485b..bddf579ba9 100644
--- a/python/arcticdb/storage_fixtures/azure.py
+++ b/python/arcticdb/storage_fixtures/azure.py
@@ -12,7 +12,7 @@
from tempfile import mkdtemp
from .api import *
-from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree
+from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap
from arcticdb.version_store.helper import add_azure_library_to_env
@@ -40,13 +40,14 @@ def _set_uri_and_client(self, auth: str):
f = self.factory
self.arctic_uri = (
- f"azure://DefaultEndpointsProtocol=http;{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};"
- f"Container={self.container};CA_cert_path={f.ca_cert_path}"
+ f"azure://DefaultEndpointsProtocol={f.http_protocol};{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};"
+ f"Container={self.container};CA_cert_path={f.client_cert_file}"
)
+ # CA_cert_dir is skipped on purpose; It will be test manually in other tests
# The retry_policy instance will be modified by the pipeline, so cannot be constant
policy = {"connection_timeout": 1, "read_timeout": 2, "retry_policy": LinearRetry(retry_total=3, backoff=1)}
- self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy)
+ self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy, connection_verify=f.client_cert_file)
# add connection_verify=False to bypass ssl checking
def __init__(self, factory: "AzuriteStorageFixtureFactory") -> None:
@@ -93,7 +94,7 @@ def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap:
env_name=Defaults.ENV,
container_name=self.container,
endpoint=self.arctic_uri,
- ca_cert_path=self.factory.ca_cert_path,
+ ca_cert_path=self.factory.client_cert_file,
with_prefix=False, # to allow azure_store_factory reuse_name to work correctly
)
return cfg
@@ -122,22 +123,19 @@ def copy_underlying_objects_to(self, destination: "AzureContainer"):
class AzuriteStorageFixtureFactory(StorageFixtureFactory):
- host = "127.0.0.1"
+ host = "localhost"
# Per https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#configure-a-connection-string-for-azurite
account_name = "devstoreaccount1"
account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
- # Default cert path is used; May run into problem on Linux's non RHEL distribution
- # See more on https://github.com/man-group/ArcticDB/issues/514
- ca_cert_path = ""
-
enforcing_permissions = False
"""Set to True to create AzureContainer with SAS authentication"""
- def __init__(self, port=0, working_dir: Optional[str] = None):
+ def __init__(self, port=0, working_dir: Optional[str] = None, use_ssl: bool = True):
+ self.http_protocol = "https" if use_ssl else "http"
self.port = port or get_ephemeral_port(0)
- self.endpoint_root = f"http://{self.host}:{self.port}"
+ self.endpoint_root = f"{self.http_protocol}://{self.host}:{self.port}"
self.working_dir = str(working_dir) if working_dir else mkdtemp(suffix="AzuriteStorageFixtureFactory")
def __str__(self):
@@ -145,6 +143,15 @@ def __str__(self):
def _safe_enter(self):
args = f"{shutil.which('azurite')} --blobPort {self.port} --blobHost {self.host} --queuePort 0 --tablePort 0"
+ if self.http_protocol == "https":
+ self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir)
+ self.client_cert_dir = self.working_dir
+ args += f" --key {self.key_file} --cert {self.cert_file}"
+ else:
+ self.key_file = ""
+ self.cert_file = ""
+ self.client_cert_file = ""
+ self.client_cert_dir = ""
self._p = GracefulProcessUtils.start(args, cwd=self.working_dir)
wait_for_server_to_come_up(self.endpoint_root, "azurite", self._p)
return self
diff --git a/python/arcticdb/storage_fixtures/s3.py b/python/arcticdb/storage_fixtures/s3.py
index 7a5bf2e85b..2d7537e221 100644
--- a/python/arcticdb/storage_fixtures/s3.py
+++ b/python/arcticdb/storage_fixtures/s3.py
@@ -22,7 +22,7 @@
from typing import NamedTuple, Optional, Any, Type
from .api import *
-from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree
+from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap
from arcticdb.version_store.helper import add_s3_library_to_env
@@ -282,21 +282,8 @@ def _start_server(self):
self.ssl = self.http_protocol == "https" # In real world, using https protocol doesn't necessarily mean ssl will be verified
if self.http_protocol == "https":
- self.key_file = os.path.join(self.working_dir, "key.pem")
- self.cert_file = os.path.join(self.working_dir, "cert.pem")
- self.client_cert_file = os.path.join(self.working_dir, "client.pem")
- ca = trustme.CA()
- server_cert = ca.issue_cert("localhost")
- server_cert.private_key_pem.write_to_path(self.key_file)
- server_cert.cert_chain_pems[0].write_to_path(self.cert_file)
- ca.cert_pem.write_to_path(self.client_cert_file)
+ self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir)
self.client_cert_dir = self.working_dir
- # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1
- subprocess.run(
- f'ln -s "{self.client_cert_file}" "$(openssl x509 -hash -noout -in "{self.client_cert_file}")".0',
- cwd=self.working_dir,
- shell=True,
- )
else:
self.key_file = ""
self.cert_file = ""
diff --git a/python/arcticdb/storage_fixtures/utils.py b/python/arcticdb/storage_fixtures/utils.py
index 86e36246fe..efc5fa8171 100644
--- a/python/arcticdb/storage_fixtures/utils.py
+++ b/python/arcticdb/storage_fixtures/utils.py
@@ -136,3 +136,21 @@ def safer_rmtree(fixture, path):
time.sleep(1)
with handler: # Even with ignore_errors=True, rmtree might still throw on Windows....
shutil.rmtree(path, ignore_errors=True)
+
+
+def get_ca_cert_for_testing(working_dir):
+ key_file = os.path.join(working_dir, "key.pem")
+ cert_file = os.path.join(working_dir, "cert.pem")
+ client_cert_file = os.path.join(working_dir, "client.pem")
+ ca = trustme.CA()
+ server_cert = ca.issue_cert("localhost")
+ server_cert.private_key_pem.write_to_path(key_file)
+ server_cert.cert_chain_pems[0].write_to_path(cert_file)
+ ca.cert_pem.write_to_path(client_cert_file)
+ # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1
+ subprocess.run(
+ f'ln -s "{client_cert_file}" "$(openssl x509 -hash -noout -in "{client_cert_file}")".0',
+ cwd=working_dir,
+ shell=True,
+ )
+ return ca, key_file, cert_file, client_cert_file # Need to keep ca alive to authenticate the cert
\ No newline at end of file