Skip to content

Commit

Permalink
Snapshot
Browse files Browse the repository at this point in the history
  • Loading branch information
phoebusm committed Apr 12, 2024
1 parent fddb0e3 commit 5c4924d
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 28 deletions.
3 changes: 2 additions & 1 deletion docs/mkdocs/docs/api/arctic_uri.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ Additional options specific for ArcticDB:
| Container | Azure container for blobs |
| Path_prefix | Path within Azure container to use for data storage |
| CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.<br><br>Default certificate path in various Linux distributions:<br>`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.<br>`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6<br>`/etc/ssl/ca-bundle.pem` for OpenSUSE<br>`/etc/pki/tls/cacert.pem` for OpenELEC<br>`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7<br>`/etc/ssl/cert.pem` for Alpine Linux |
| CA_cert_dir | (Non-Windows platform only) Azure CA certificate directory. It sets option ``CURLOPT_CAPATH`` in Azure SDK's libcurl backend. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files exist (https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html) |

For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting.
For Windows user, `CA_cert_path` AND `CA_cert_dir` cannot be set. Please set CA certificate related option on Windows setting.
For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate

Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`.
Expand Down
31 changes: 19 additions & 12 deletions python/arcticdb/storage_fixtures/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tempfile import mkdtemp

from .api import *
from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree
from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap
from arcticdb.version_store.helper import add_azure_library_to_env

Expand Down Expand Up @@ -40,13 +40,14 @@ def _set_uri_and_client(self, auth: str):

f = self.factory
self.arctic_uri = (
f"azure://DefaultEndpointsProtocol=http;{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};"
f"Container={self.container};CA_cert_path={f.ca_cert_path}"
f"azure://DefaultEndpointsProtocol={f.http_protocol};{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};"
f"Container={self.container};CA_cert_path={f.client_cert_file}"
)
# CA_cert_dir is skipped on purpose; It will be test manually in other tests

# The retry_policy instance will be modified by the pipeline, so cannot be constant
policy = {"connection_timeout": 1, "read_timeout": 2, "retry_policy": LinearRetry(retry_total=3, backoff=1)}
self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy)
self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy, connection_verify=f.client_cert_file)
# add connection_verify=False to bypass ssl checking

def __init__(self, factory: "AzuriteStorageFixtureFactory") -> None:
Expand Down Expand Up @@ -93,7 +94,7 @@ def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap:
env_name=Defaults.ENV,
container_name=self.container,
endpoint=self.arctic_uri,
ca_cert_path=self.factory.ca_cert_path,
ca_cert_path=self.factory.client_cert_file,
with_prefix=False, # to allow azure_store_factory reuse_name to work correctly
)
return cfg
Expand Down Expand Up @@ -122,29 +123,35 @@ def copy_underlying_objects_to(self, destination: "AzureContainer"):


class AzuriteStorageFixtureFactory(StorageFixtureFactory):
host = "127.0.0.1"
host = "localhost"

# Per https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#configure-a-connection-string-for-azurite
account_name = "devstoreaccount1"
account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="

# Default cert path is used; May run into problem on Linux's non RHEL distribution
# See more on https://github.com/man-group/ArcticDB/issues/514
ca_cert_path = ""

enforcing_permissions = False
"""Set to True to create AzureContainer with SAS authentication"""

def __init__(self, port=0, working_dir: Optional[str] = None):
def __init__(self, port=0, working_dir: Optional[str] = None, use_ssl: bool = True):
self.http_protocol = "https" if use_ssl else "http"
self.port = port or get_ephemeral_port(0)
self.endpoint_root = f"http://{self.host}:{self.port}"
self.endpoint_root = f"{self.http_protocol}://{self.host}:{self.port}"
self.working_dir = str(working_dir) if working_dir else mkdtemp(suffix="AzuriteStorageFixtureFactory")

def __str__(self):
return f"AzuriteStorageFixtureFactory[port={self.port},dir={self.working_dir}]"

def _safe_enter(self):
args = f"{shutil.which('azurite')} --blobPort {self.port} --blobHost {self.host} --queuePort 0 --tablePort 0"
if self.http_protocol == "https":
self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir)
self.client_cert_dir = self.working_dir
args += f" --key {self.key_file} --cert {self.cert_file}"
else:
self.key_file = ""
self.cert_file = ""
self.client_cert_file = ""
self.client_cert_dir = ""
self._p = GracefulProcessUtils.start(args, cwd=self.working_dir)
wait_for_server_to_come_up(self.endpoint_root, "azurite", self._p)
return self
Expand Down
17 changes: 2 additions & 15 deletions python/arcticdb/storage_fixtures/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing import NamedTuple, Optional, Any, Type

from .api import *
from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree
from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing
from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap
from arcticdb.version_store.helper import add_s3_library_to_env

Expand Down Expand Up @@ -282,21 +282,8 @@ def _start_server(self):

self.ssl = self.http_protocol == "https" # In real world, using https protocol doesn't necessarily mean ssl will be verified
if self.http_protocol == "https":
self.key_file = os.path.join(self.working_dir, "key.pem")
self.cert_file = os.path.join(self.working_dir, "cert.pem")
self.client_cert_file = os.path.join(self.working_dir, "client.pem")
ca = trustme.CA()
server_cert = ca.issue_cert("localhost")
server_cert.private_key_pem.write_to_path(self.key_file)
server_cert.cert_chain_pems[0].write_to_path(self.cert_file)
ca.cert_pem.write_to_path(self.client_cert_file)
self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir)
self.client_cert_dir = self.working_dir
# Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1
subprocess.run(
f'ln -s "{self.client_cert_file}" "$(openssl x509 -hash -noout -in "{self.client_cert_file}")".0',
cwd=self.working_dir,
shell=True,
)
else:
self.key_file = ""
self.cert_file = ""
Expand Down
18 changes: 18 additions & 0 deletions python/arcticdb/storage_fixtures/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,21 @@ def safer_rmtree(fixture, path):
time.sleep(1)
with handler: # Even with ignore_errors=True, rmtree might still throw on Windows....
shutil.rmtree(path, ignore_errors=True)


def get_ca_cert_for_testing(working_dir):
key_file = os.path.join(working_dir, "key.pem")
cert_file = os.path.join(working_dir, "cert.pem")
client_cert_file = os.path.join(working_dir, "client.pem")
ca = trustme.CA()
server_cert = ca.issue_cert("localhost")
server_cert.private_key_pem.write_to_path(key_file)
server_cert.cert_chain_pems[0].write_to_path(cert_file)
ca.cert_pem.write_to_path(client_cert_file)
# Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1
subprocess.run(
f'ln -s "{client_cert_file}" "$(openssl x509 -hash -noout -in "{client_cert_file}")".0',
cwd=working_dir,
shell=True,
)
return ca, key_file, cert_file, client_cert_file # Need to keep ca alive to authenticate the cert

0 comments on commit 5c4924d

Please sign in to comment.