diff --git a/cpp/CMake/AzureVcpkg.cmake b/cpp/CMake/AzureVcpkg.cmake deleted file mode 100644 index 15388bda2bb..00000000000 --- a/cpp/CMake/AzureVcpkg.cmake +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# SPDX-License-Identifier: MIT - -# We need to know an absolute path to our repo root to do things like referencing ./LICENSE.txt file. -set(AZ_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/..") - -macro(az_vcpkg_integrate) - message("Vcpkg integrate step.") - # AUTO CMAKE_TOOLCHAIN_FILE: - # User can call `cmake -DCMAKE_TOOLCHAIN_FILE="path_to_the_toolchain"` as the most specific scenario. - # As the last alternative (default case), Azure SDK will automatically clone VCPKG folder and set toolchain from there. - if(NOT DEFINED CMAKE_TOOLCHAIN_FILE) - message("CMAKE_TOOLCHAIN_FILE is not defined. Define it for the user.") - # Set AZURE_SDK_DISABLE_AUTO_VCPKG env var to avoid Azure SDK from cloning and setting VCPKG automatically - # This option delegate package's dependencies installation to user. - if(NOT DEFINED ENV{AZURE_SDK_DISABLE_AUTO_VCPKG}) - message("AZURE_SDK_DISABLE_AUTO_VCPKG is not defined. Fetch a local copy of vcpkg.") - # GET VCPKG FROM SOURCE - # User can set env var AZURE_SDK_VCPKG_COMMIT to pick the VCPKG commit to fetch - set(VCPKG_COMMIT_STRING 94ce0dab56f4d8ba6bd631ba59ed682b02d45c46) # default SDK tested commit - if(DEFINED ENV{AZURE_SDK_VCPKG_COMMIT}) - message("AZURE_SDK_VCPKG_COMMIT is defined. Using that instead of the default.") - set(VCPKG_COMMIT_STRING "$ENV{AZURE_SDK_VCPKG_COMMIT}") # default SDK tested commit - endif() - message("Vcpkg commit string used: ${VCPKG_COMMIT_STRING}") - include(FetchContent) - FetchContent_Declare( - vcpkg - GIT_REPOSITORY https://github.com/microsoft/vcpkg.git - GIT_TAG ${VCPKG_COMMIT_STRING} - ) - FetchContent_GetProperties(vcpkg) - # make sure to pull vcpkg only once. - if(NOT vcpkg_POPULATED) - FetchContent_Populate(vcpkg) - endif() - # use the vcpkg source path - set(CMAKE_TOOLCHAIN_FILE "${vcpkg_SOURCE_DIR}/scripts/buildsystems/vcpkg.cmake" CACHE STRING "") - endif() - endif() - - # enable triplet customization - if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET) - set(VCPKG_TARGET_TRIPLET "$ENV{VCPKG_DEFAULT_TRIPLET}" CACHE STRING "") - endif() -endmacro() - -macro(az_vcpkg_portfile_prep targetName fileName contentToRemove) - # with sdk//vcpkg/ - file(READ "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/${fileName}" fileContents) - - # Windows -> Unix line endings - string(FIND fileContents "\r\n" crLfPos) - - if (crLfPos GREATER -1) - string(REPLACE "\r\n" "\n" fileContents ${fileContents}) - endif() - - # remove comment header - string(REPLACE "${contentToRemove}" "" fileContents ${fileContents}) - - # undo Windows -> Unix line endings (if applicable) - if (crLfPos GREATER -1) - string(REPLACE "\n" "\r\n" fileContents ${fileContents}) - endif() - unset(crLfPos) - - # output to an intermediate location - file (WRITE "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" ${fileContents}) - unset(fileContents) - - # Produce the files to help with the vcpkg release. - # Go to the /out/build//vcpkg directory, and copy (merge) "ports" folder to the vcpkg repo. - # Then, update the portfile.cmake file SHA512 from "1" to the actual hash (a good way to do it is to uninstall a package, - # clean vcpkg/downloads, vcpkg/buildtrees, run "vcpkg install ", and get the SHA from the error message). - configure_file( - "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" - "${CMAKE_BINARY_DIR}/vcpkg/ports/${targetName}-cpp/${fileName}" - @ONLY - ) -endmacro() - -macro(az_vcpkg_export targetName macroNamePart dllImportExportHeaderPath) - foreach(vcpkgFile "vcpkg.json" "portfile.cmake") - az_vcpkg_portfile_prep( - "${targetName}" - "${vcpkgFile}" - "# Copyright (c) Microsoft Corporation. All rights reserved.\n# SPDX-License-Identifier: MIT\n\n" - ) - endforeach() - - # Standard names for folders such as "bin", "lib", "include". We could hardcode, but some other libs use it too (curl). - include(GNUInstallDirs) - - # When installing, copy our "inc" directory (headers) to "include" directory at the install location. - install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/azure/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/azure") - - # Copy license as "copyright" (vcpkg dictates naming and location). - install(FILES "${AZ_ROOT_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" RENAME "copyright") - - # Indicate where to install targets. Mirrors what other ports do. - install( - TARGETS "${targetName}" - EXPORT "${targetName}-cppTargets" - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # DLLs (if produced by build) go to "/bin" - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # static .lib files - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # .lib files for DLL build - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers - ) - - # If building a Windows DLL, patch the dll_import_export.hpp - if(WIN32 AND BUILD_SHARED_LIBS) - add_compile_definitions(AZ_${macroNamePart}_BEING_BUILT) - target_compile_definitions(${targetName} PUBLIC AZ_${macroNamePart}_DLL) - - set(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE "*/ + 1 /*") - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/inc/${dllImportExportHeaderPath}" - "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}" - @ONLY - ) - unset(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE) - - get_filename_component(dllImportExportHeaderDir ${dllImportExportHeaderPath} DIRECTORY) - install( - FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderDir}" - ) - unset(dllImportExportHeaderDir) - endif() - - # Export the targets file itself. - install( - EXPORT "${targetName}-cppTargets" - DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" - NAMESPACE Azure:: # Not the C++ namespace, but a namespace in terms of cmake. - FILE "${targetName}-cppTargets.cmake" - ) - - # configure_package_config_file(), write_basic_package_version_file() - include(CMakePackageConfigHelpers) - - # Produce package config file. - configure_package_config_file( - "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/Config.cmake.in" - "${targetName}-cppConfig.cmake" - INSTALL_DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" - PATH_VARS - CMAKE_INSTALL_LIBDIR) - - # Produce version file. - write_basic_package_version_file( - "${targetName}-cppConfigVersion.cmake" - VERSION ${AZ_LIBRARY_VERSION} # the version that we extracted from package_version.hpp - COMPATIBILITY SameMajorVersion - ) - - # Install package config and version files. - install( - FILES - "${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfig.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfigVersion.cmake" - DESTINATION - "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" # to shares/ - ) - - # Export all the installs above as package. - export(PACKAGE "${targetName}-cpp") -endmacro() \ No newline at end of file diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index be04efd6f48..d97a9b7dc53 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -100,9 +100,7 @@ include(PythonUtils) # Must be called before Pybind (third_party) to override it add_subdirectory(third_party) add_subdirectory(proto) -if(NOT ${ARCTICDB_USING_CONDA}) - include(AzureVcpkg) #AzureVcpkg.cmake is from https://github.com/Azure/azure-sdk-for-cpp/blob/main/cmake-modules/AzureVcpkg.cmake, commit ada77b3 -endif() + python_utils_dump_vars_if_enabled("After Pybind") python_utils_check_include_dirs("accepted by pybind") diff --git a/cpp/arcticdb/storage/azure/azure_real_client.cpp b/cpp/arcticdb/storage/azure/azure_real_client.cpp index 60a8101d787..b3404f7ade0 100644 --- a/cpp/arcticdb/storage/azure/azure_real_client.cpp +++ b/cpp/arcticdb/storage/azure/azure_real_client.cpp @@ -32,9 +32,14 @@ container_client(BlobContainerClient::CreateFromConnectionString(conf.endpoint() Azure::Storage::Blobs::BlobClientOptions RealAzureClient::get_client_options(const Config &conf) { BlobClientOptions client_options; - if (!conf.ca_cert_path().empty()) {//WARNING: Setting ca_cert_path will force Azure sdk uses libcurl as backend support, instead of winhttp + if (!conf.ca_cert_path().empty() || !conf.ca_cert_dir().empty()) {//WARNING: Setting ca_cert_path or ca_cert_dir will force Azure sdk uses libcurl as backend support, instead of winhttp Azure::Core::Http::CurlTransportOptions curl_transport_options; - curl_transport_options.CAInfo = conf.ca_cert_path(); + if (!conf.ca_cert_path().empty()) { + curl_transport_options.CAInfo = conf.ca_cert_path(); + } + if (!conf.ca_cert_dir().empty()) { + curl_transport_options.CAPath = conf.ca_cert_dir(); + } client_options.Transport.Transport = std::make_shared(curl_transport_options); } return client_options; diff --git a/cpp/arcticdb/storage/azure/azure_storage.cpp b/cpp/arcticdb/storage/azure/azure_storage.cpp index 498cbe351d6..b0e79b94126 100644 --- a/cpp/arcticdb/storage/azure/azure_storage.cpp +++ b/cpp/arcticdb/storage/azure/azure_storage.cpp @@ -350,6 +350,10 @@ AzureStorage::AzureStorage(const LibraryPath &library_path, OpenMode mode, const ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert path"); else ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert path: {}", conf.ca_cert_path()); + if (conf.ca_cert_dir().empty()) + ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert directory"); + else + ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert directory: {}", conf.ca_cert_dir()); ARCTICDB_RUNTIME_DEBUG(log::storage(), "Connecting to Azure Blob Storage: {} Container: {}", conf.endpoint(), conf.container_name()); if (!conf.prefix().empty()) { diff --git a/cpp/arcticdb/storage/azure/azure_storage.hpp b/cpp/arcticdb/storage/azure/azure_storage.hpp index 7dc04f28ed1..f6da42e51f7 100644 --- a/cpp/arcticdb/storage/azure/azure_storage.hpp +++ b/cpp/arcticdb/storage/azure/azure_storage.hpp @@ -61,8 +61,6 @@ class AzureStorage final : public Storage { unsigned int request_timeout_; Azure::Storage::Blobs::UploadBlockBlobFromOptions upload_option_; Azure::Storage::Blobs::DownloadBlobToOptions download_option_; - - Azure::Storage::Blobs::BlobClientOptions get_client_options(const Config &conf); }; inline arcticdb::proto::storage::VariantStorage pack_config(const std::string &container_name) { diff --git a/cpp/arcticdb/storage/python_bindings.cpp b/cpp/arcticdb/storage/python_bindings.cpp index c573852554c..f4b5ee192e6 100644 --- a/cpp/arcticdb/storage/python_bindings.cpp +++ b/cpp/arcticdb/storage/python_bindings.cpp @@ -124,7 +124,8 @@ void register_bindings(py::module& storage) { .def(py::init<>()) .def_property("container_name", &AzureOverride::container_name, &AzureOverride::set_container_name) .def_property("endpoint", &AzureOverride::endpoint, &AzureOverride::set_endpoint) - .def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path); + .def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path) + .def_property("ca_cert_dir", &AzureOverride::ca_cert_dir, &AzureOverride::set_ca_cert_dir); py::class_(storage, "LmdbOverride") .def(py::init<>()) diff --git a/cpp/arcticdb/storage/storage_override.hpp b/cpp/arcticdb/storage/storage_override.hpp index bb4d2ccb924..40f42d23051 100644 --- a/cpp/arcticdb/storage/storage_override.hpp +++ b/cpp/arcticdb/storage/storage_override.hpp @@ -127,6 +127,7 @@ class AzureOverride { std::string container_name_; std::string endpoint_; std::string ca_cert_path_; + std::string ca_cert_dir_; public: std::string container_name() const { @@ -153,6 +154,14 @@ class AzureOverride { ca_cert_path_ = ca_cert_path; } + std::string ca_cert_dir() const { + return ca_cert_dir_; + } + + void set_ca_cert_dir(std::string_view ca_cert_dir){ + ca_cert_dir_ = ca_cert_dir; + } + void modify_storage_config(arcticdb::proto::storage::VariantStorage& storage) const { if(storage.config().Is()) { arcticdb::proto::azure_storage::Config azure_storage; @@ -161,6 +170,7 @@ class AzureOverride { azure_storage.set_container_name(container_name_); azure_storage.set_endpoint(endpoint_); azure_storage.set_ca_cert_path(ca_cert_path_); + azure_storage.set_ca_cert_dir(ca_cert_dir_); util::pack_to_any(azure_storage, *storage.mutable_config()); } diff --git a/cpp/proto/arcticc/pb2/azure_storage.proto b/cpp/proto/arcticc/pb2/azure_storage.proto index 9d47677e039..84abb3ab2fa 100644 --- a/cpp/proto/arcticc/pb2/azure_storage.proto +++ b/cpp/proto/arcticc/pb2/azure_storage.proto @@ -18,4 +18,5 @@ message Config { string prefix = 5; string ca_cert_path = 6; bool use_mock_storage_for_testing = 7; + string ca_cert_dir = 8; } diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index d532d8305b7..d8b2806cde9 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -61,6 +61,7 @@ "name": "arrow", "default-features": false }, + "azure-core-cpp", "azure-identity-cpp", "azure-storage-blobs-cpp", "rocksdb", @@ -69,6 +70,7 @@ "overrides": [ { "name": "openssl", "version-string": "1.1.1n#1" }, { "name": "aws-sdk-cpp", "version": "1.11.201" }, + { "name": "azure-core-cpp", "version": "1.11.2" }, { "name": "aws-c-s3", "version": "0.3.24" }, { "name": "bitmagic", "version": "7.12.3" }, { "name": "boost-algorithm", "version": "1.80.0#1" }, diff --git a/docs/mkdocs/docs/api/arctic_uri.md b/docs/mkdocs/docs/api/arctic_uri.md index 7e820007a9f..986612972da 100644 --- a/docs/mkdocs/docs/api/arctic_uri.md +++ b/docs/mkdocs/docs/api/arctic_uri.md @@ -41,10 +41,11 @@ Additional options specific for ArcticDB: |---------------|---------------| | Container | Azure container for blobs | | Path_prefix | Path within Azure container to use for data storage | -| CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.

Default certificate path in various Linux distributions:
`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.
`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6
`/etc/ssl/ca-bundle.pem` for OpenSUSE
`/etc/pki/tls/cacert.pem` for OpenELEC
`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7
`/etc/ssl/cert.pem` for Alpine Linux | +| CA_cert_path | (Non-Linux platform only) Azure CA certificate path. If not set, python ``ssl.get_default_verify_paths().cafile`` will be used. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.| +| CA_cert_dir | (Non-Linux platform only) Azure CA certificate directory. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files exist (https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html). If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.| -For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting. -For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate +For non-Linux platform, `CA_cert_path` AND `CA_cert_dir` cannot be set. Please set CA certificate related option on corresponding OS setting. +For Windows, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`. diff --git a/python/arcticdb/adapters/azure_library_adapter.py b/python/arcticdb/adapters/azure_library_adapter.py index 05d1eee07e3..6d59e97209a 100644 --- a/python/arcticdb/adapters/azure_library_adapter.py +++ b/python/arcticdb/adapters/azure_library_adapter.py @@ -8,6 +8,7 @@ import re import time from typing import Optional +import ssl import platform from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap, LibraryConfig @@ -26,7 +27,10 @@ @dataclass class ParsedQuery: Path_prefix: Optional[str] = None - CA_cert_path: str = "" + # winhttp is used as Azure backend support on Winodws by default; winhttp itself mainatains ca cert. + # The options should be left empty else libcurl will be used on Windows + CA_cert_path: Optional[str] = "" # CURLOPT_CAINFO in curl + CA_cert_dir: Optional[str] = "" # CURLOPT_CAPATH in curl Container: Optional[str] = None @@ -54,9 +58,16 @@ def __init__(self, uri: str, encoding_version: EncodingVersion, *args, **kwargs) ] ) self._container = self._query_params.Container - if platform.system() == "Windows" and self._query_params.CA_cert_path: - raise ValueError(f"CA_cert_path cannot be set on Windows platform") + if platform.system() != "Linux" and (self._query_params.CA_cert_path or self._query_params.CA_cert_dir): + raise ValueError("You have provided `ca_cert_path` or `ca_cert_dir` in the URI which is only supported on Linux. " \ + "Remove the setting in the connection URI and use your operating system defaults.") self._ca_cert_path = self._query_params.CA_cert_path + self._ca_cert_dir = self._query_params.CA_cert_dir + if not self._ca_cert_path and not self._ca_cert_dir and platform.system() == "Linux": + if ssl.get_default_verify_paths().cafile is not None: + self._ca_cert_path = ssl.get_default_verify_paths().cafile + if ssl.get_default_verify_paths().capath is not None: + self._ca_cert_dir = ssl.get_default_verify_paths().capath self._encoding_version = encoding_version super().__init__(uri, self._encoding_version) @@ -80,6 +91,7 @@ def config_library(self): endpoint=self._endpoint, with_prefix=with_prefix, ca_cert_path=self._ca_cert_path, + ca_cert_dir=self._ca_cert_dir, ) lib = NativeVersionStore.create_store_from_config( @@ -111,6 +123,8 @@ def get_storage_override(self) -> AzureOverride: azure_override.endpoint = self._endpoint if self._ca_cert_path: azure_override.ca_cert_path = self._ca_cert_path + if self._ca_cert_dir: + azure_override.ca_cert_dir = self._ca_cert_dir storage_override = StorageOverride() storage_override.set_azure_override(azure_override) @@ -138,6 +152,7 @@ def add_library_to_env(self, env_cfg: EnvironmentConfigsMap, name: str): endpoint=self._endpoint, with_prefix=with_prefix, ca_cert_path=self._ca_cert_path, + ca_cert_dir=self._ca_cert_dir, ) @property diff --git a/python/arcticdb/storage_fixtures/api.py b/python/arcticdb/storage_fixtures/api.py index d48fb02cd46..f920b9880e2 100644 --- a/python/arcticdb/storage_fixtures/api.py +++ b/python/arcticdb/storage_fixtures/api.py @@ -29,6 +29,7 @@ class ArcticUriFields(Enum): PASSWORD = "PASSWORD" BUCKET = "BUCKET" CA_PATH = "CA_PATH" + SSL = "SSL" def __str__(self): return self.value @@ -149,7 +150,6 @@ def replace_uri_field(cls, uri: str, field: ArcticUriFields, replacement: str, s """start & end are these regex groups: 1=field name, 2=field value, 3=optional field separator""" regex = cls._FIELD_REGEX[field] match = regex.search(uri) - assert match, f"{uri} does not have {field}" return f"{uri[:match.start(start)]}{replacement}{uri[match.end(end):]}" diff --git a/python/arcticdb/storage_fixtures/azure.py b/python/arcticdb/storage_fixtures/azure.py index c7a34e485be..f9a802e937e 100644 --- a/python/arcticdb/storage_fixtures/azure.py +++ b/python/arcticdb/storage_fixtures/azure.py @@ -12,9 +12,10 @@ from tempfile import mkdtemp from .api import * -from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree +from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap from arcticdb.version_store.helper import add_azure_library_to_env +from tests.util.mark import SSL_TEST_ENABLED # All storage client libraries to be imported on-demand to speed up start-up of ad-hoc test runs if TYPE_CHECKING: @@ -40,13 +41,14 @@ def _set_uri_and_client(self, auth: str): f = self.factory self.arctic_uri = ( - f"azure://DefaultEndpointsProtocol=http;{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};" - f"Container={self.container};CA_cert_path={f.ca_cert_path}" + f"azure://DefaultEndpointsProtocol={f.http_protocol};{auth};BlobEndpoint={f.endpoint_root}/{f.account_name};" + f"Container={self.container};CA_cert_path={f.client_cert_file}" ) + # CA_cert_dir is skipped on purpose; It will be test manually in other tests # The retry_policy instance will be modified by the pipeline, so cannot be constant policy = {"connection_timeout": 1, "read_timeout": 2, "retry_policy": LinearRetry(retry_total=3, backoff=1)} - self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy) + self.client = ContainerClient.from_connection_string(self.arctic_uri, self.container, **policy, connection_verify=f.client_cert_file) # add connection_verify=False to bypass ssl checking def __init__(self, factory: "AzuriteStorageFixtureFactory") -> None: @@ -93,7 +95,7 @@ def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap: env_name=Defaults.ENV, container_name=self.container, endpoint=self.arctic_uri, - ca_cert_path=self.factory.ca_cert_path, + ca_cert_path=self.factory.client_cert_file, with_prefix=False, # to allow azure_store_factory reuse_name to work correctly ) return cfg @@ -122,22 +124,19 @@ def copy_underlying_objects_to(self, destination: "AzureContainer"): class AzuriteStorageFixtureFactory(StorageFixtureFactory): - host = "127.0.0.1" + host = "localhost" # Per https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#configure-a-connection-string-for-azurite account_name = "devstoreaccount1" account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" - # Default cert path is used; May run into problem on Linux's non RHEL distribution - # See more on https://github.com/man-group/ArcticDB/issues/514 - ca_cert_path = "" - enforcing_permissions = False """Set to True to create AzureContainer with SAS authentication""" - def __init__(self, port=0, working_dir: Optional[str] = None): + def __init__(self, port=0, working_dir: Optional[str] = None, use_ssl: bool = True): + self.http_protocol = "https" if use_ssl else "http" self.port = port or get_ephemeral_port(0) - self.endpoint_root = f"http://{self.host}:{self.port}" + self.endpoint_root = f"{self.http_protocol}://{self.host}:{self.port}" self.working_dir = str(working_dir) if working_dir else mkdtemp(suffix="AzuriteStorageFixtureFactory") def __str__(self): @@ -145,6 +144,16 @@ def __str__(self): def _safe_enter(self): args = f"{shutil.which('azurite')} --blobPort {self.port} --blobHost {self.host} --queuePort 0 --tablePort 0" + if SSL_TEST_ENABLED: + self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir) + else: + self.ca = "" + self.key_file = "" + self.cert_file = "" + self.client_cert_file = "" + self.client_cert_dir = self.working_dir + if self.http_protocol == "https": + args += f" --key {self.key_file} --cert {self.cert_file}" self._p = GracefulProcessUtils.start(args, cwd=self.working_dir) wait_for_server_to_come_up(self.endpoint_root, "azurite", self._p) return self diff --git a/python/arcticdb/storage_fixtures/s3.py b/python/arcticdb/storage_fixtures/s3.py index 7a5bf2e85ba..b9ad3873381 100644 --- a/python/arcticdb/storage_fixtures/s3.py +++ b/python/arcticdb/storage_fixtures/s3.py @@ -22,9 +22,10 @@ from typing import NamedTuple, Optional, Any, Type from .api import * -from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree +from .utils import get_ephemeral_port, GracefulProcessUtils, wait_for_server_to_come_up, safer_rmtree, get_ca_cert_for_testing from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap from arcticdb.version_store.helper import add_s3_library_to_env +from tests.util.mark import SSL_TEST_ENABLED # All storage client libraries to be imported on-demand to speed up start-up of ad-hoc test runs @@ -40,6 +41,8 @@ class S3Bucket(StorageFixture): ArcticUriFields.BUCKET: re.compile("^s3://[^:]+(:)([^?]+)"), ArcticUriFields.USER: re.compile("[?&](access=)([^&]+)(&?)"), ArcticUriFields.PASSWORD: re.compile("[?&](secret=)([^&]+)(&?)"), + ArcticUriFields.CA_PATH: re.compile("[?&](CA_cert_path=)([^&]*)(&?)"), + ArcticUriFields.SSL: re.compile("[?&](ssl=)([^&]+)(&?)"), } key: Key @@ -281,27 +284,14 @@ def _start_server(self): self._iam_endpoint = f"{self.http_protocol}://localhost:{port}" self.ssl = self.http_protocol == "https" # In real world, using https protocol doesn't necessarily mean ssl will be verified - if self.http_protocol == "https": - self.key_file = os.path.join(self.working_dir, "key.pem") - self.cert_file = os.path.join(self.working_dir, "cert.pem") - self.client_cert_file = os.path.join(self.working_dir, "client.pem") - ca = trustme.CA() - server_cert = ca.issue_cert("localhost") - server_cert.private_key_pem.write_to_path(self.key_file) - server_cert.cert_chain_pems[0].write_to_path(self.cert_file) - ca.cert_pem.write_to_path(self.client_cert_file) - self.client_cert_dir = self.working_dir - # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1 - subprocess.run( - f'ln -s "{self.client_cert_file}" "$(openssl x509 -hash -noout -in "{self.client_cert_file}")".0', - cwd=self.working_dir, - shell=True, - ) + if SSL_TEST_ENABLED: + self.ca, self.key_file, self.cert_file, self.client_cert_file = get_ca_cert_for_testing(self.working_dir) else: + self.ca = "" self.key_file = "" self.cert_file = "" self.client_cert_file = "" - self.client_cert_dir = "" + self.client_cert_dir = self.working_dir self._p = multiprocessing.Process( target=self.run_server, diff --git a/python/arcticdb/storage_fixtures/utils.py b/python/arcticdb/storage_fixtures/utils.py index 86e36246fe2..efc5fa8171b 100644 --- a/python/arcticdb/storage_fixtures/utils.py +++ b/python/arcticdb/storage_fixtures/utils.py @@ -136,3 +136,21 @@ def safer_rmtree(fixture, path): time.sleep(1) with handler: # Even with ignore_errors=True, rmtree might still throw on Windows.... shutil.rmtree(path, ignore_errors=True) + + +def get_ca_cert_for_testing(working_dir): + key_file = os.path.join(working_dir, "key.pem") + cert_file = os.path.join(working_dir, "cert.pem") + client_cert_file = os.path.join(working_dir, "client.pem") + ca = trustme.CA() + server_cert = ca.issue_cert("localhost") + server_cert.private_key_pem.write_to_path(key_file) + server_cert.cert_chain_pems[0].write_to_path(cert_file) + ca.cert_pem.write_to_path(client_cert_file) + # Create the sym link for curl CURLOPT_CAPATH option; rehash only available on openssl >=1.1.1 + subprocess.run( + f'ln -s "{client_cert_file}" "$(openssl x509 -hash -noout -in "{client_cert_file}")".0', + cwd=working_dir, + shell=True, + ) + return ca, key_file, cert_file, client_cert_file # Need to keep ca alive to authenticate the cert \ No newline at end of file diff --git a/python/arcticdb/version_store/helper.py b/python/arcticdb/version_store/helper.py index fe674f18217..bcf3ca1d877 100644 --- a/python/arcticdb/version_store/helper.py +++ b/python/arcticdb/version_store/helper.py @@ -323,6 +323,7 @@ def get_azure_proto( endpoint, with_prefix: Optional[Union[bool, str]] = True, ca_cert_path: str = "", + ca_cert_dir: str = "", ): env = cfg.env_by_id[env_name] azure = AzureConfig() @@ -338,6 +339,7 @@ def get_azure_proto( else: azure.prefix = lib_name azure.ca_cert_path = ca_cert_path + azure.ca_cert_dir = ca_cert_dir sid, storage = get_storage_for_lib_name(azure.prefix, env) storage.config.Pack(azure, type_url_prefix="cxx.arctic.org") @@ -353,6 +355,7 @@ def add_azure_library_to_env( description: Optional[bool] = None, with_prefix: Optional[Union[bool, str]] = True, ca_cert_path: str = "", + ca_cert_dir: str = "", ): env = cfg.env_by_id[env_name] sid, _ = get_azure_proto( @@ -363,6 +366,7 @@ def add_azure_library_to_env( endpoint=endpoint, with_prefix=with_prefix, ca_cert_path=ca_cert_path, + ca_cert_dir=ca_cert_dir, ) _add_lib_desc_to_env(env, lib_name, sid, description) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index b3e0c9b473f..5024b540b67 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -36,7 +36,8 @@ AZURE_TESTS_MARK, MONGO_TESTS_MARK, REAL_S3_TESTS_MARK, - S3_SSL_TEST_ENABLED, + SSL_TEST_ENABLED, + ARCTICDB_USING_CONDA ) # region =================================== Misc. Constants & Setup ==================================== @@ -112,7 +113,7 @@ def lmdb_library(lmdb_storage, lib_name): @pytest.fixture(scope="session") def s3_storage_factory(): - with MotoS3StorageFixtureFactory(use_ssl=S3_SSL_TEST_ENABLED) as f: + with MotoS3StorageFixtureFactory(use_ssl=SSL_TEST_ENABLED) as f: yield f @@ -168,7 +169,7 @@ def real_s3_storage(real_s3_storage_factory): @pytest.fixture(scope="session") def azurite_storage_factory(): - with AzuriteStorageFixtureFactory() as f: + with AzuriteStorageFixtureFactory(use_ssl=False) as f: yield f @@ -178,6 +179,18 @@ def azurite_storage(azurite_storage_factory: AzuriteStorageFixtureFactory): yield f +@pytest.fixture(scope="session") +def azurite_ssl_storage_factory(): + with AzuriteStorageFixtureFactory(use_ssl=True) as f: + yield f + + +@pytest.fixture +def azurite_ssl_storage(azurite_ssl_storage_factory: AzuriteStorageFixtureFactory): + with azurite_ssl_storage_factory.create_fixture() as f: + yield f + + @pytest.fixture(scope="session") def mongo_server(): with auto_detect_server() as s: diff --git a/python/tests/integration/arcticdb/test_arctic.py b/python/tests/integration/arcticdb/test_arctic.py index 00014668545..a02c68a0bd4 100644 --- a/python/tests/integration/arcticdb/test_arctic.py +++ b/python/tests/integration/arcticdb/test_arctic.py @@ -15,7 +15,7 @@ from datetime import datetime, timezone from typing import List from enum import Enum -import platform +import ssl from arcticdb_ext.exceptions import InternalException, UserInputException from arcticdb_ext.storage import NoDataFoundException @@ -36,7 +36,7 @@ ArcticInvalidApiUsageException, ) -from tests.util.mark import AZURE_TESTS_MARK, MONGO_TESTS_MARK, REAL_S3_TESTS_MARK, S3_SSL_TESTS_MARK +from tests.util.mark import AZURE_TESTS_MARK, MONGO_TESTS_MARK, REAL_S3_TESTS_MARK, SSL_TESTS_MARK, SSL_TEST_ENABLED class ParameterDisplayStatus(Enum): NOT_SHOW = 1 @@ -45,40 +45,50 @@ class ParameterDisplayStatus(Enum): parameter_display_status = [ParameterDisplayStatus.NOT_SHOW, ParameterDisplayStatus.DISABLE, ParameterDisplayStatus.ENABLE] -@S3_SSL_TESTS_MARK +@pytest.mark.parametrize('storages, delimiter', [('s3_storage', '&'), pytest.param('s3_no_ssl_storage', '&', marks=SSL_TESTS_MARK), + pytest.param('azurite_storage', ';', marks=AZURE_TESTS_MARK), + pytest.param('azurite_ssl_storage', ';', marks=[AZURE_TESTS_MARK, SSL_TESTS_MARK])]) @pytest.mark.parametrize('client_cert_file', parameter_display_status) -@pytest.mark.parametrize('ssl', parameter_display_status) -def test_s3_no_ssl_verification(s3_no_ssl_storage, client_cert_file, ssl): - uri = s3_no_ssl_storage.arctic_uri - if ssl == ParameterDisplayStatus.DISABLE: - uri += "&ssl=False" - elif ssl == ParameterDisplayStatus.ENABLE: - uri += "&ssl=True" +@pytest.mark.parametrize('client_cert_dir', parameter_display_status) +@pytest.mark.parametrize('ssl_setting', parameter_display_status) +def test_s3_ssl_verification(request, monkeypatch, storages, delimiter, client_cert_file, client_cert_dir, ssl_setting): + if 'azurite' in storages and ssl_setting != ParameterDisplayStatus.NOT_SHOW: + pytest.skip("Skipping Azurite test with ssl verification as its for S3 only.") + if not SSL_TEST_ENABLED and (client_cert_dir == ParameterDisplayStatus.ENABLE or client_cert_file == ParameterDisplayStatus.ENABLE): + pytest.skip("Skipping SSL test as it is not enabled.") + + storage = request.getfixturevalue(storages) + # Leaving ca file and ca dir unset will fallback to using os default setting, + # which is different from the test environment + class DefaultSetting: + def __init__(self): + self.cafile = storage.factory.client_cert_file + self.capath = storage.factory.client_cert_dir + default_setting = DefaultSetting() + monkeypatch.setattr("ssl.get_default_verify_paths", lambda: default_setting) + # Clear default setting in the uri + uri = storage.arctic_uri + if SSL_TEST_ENABLED: + uri = storage.replace_uri_field(uri, ArcticUriFields.CA_PATH, "", start=1, end=3).rstrip(delimiter) + if storages == 's3_storage': + uri = storage.replace_uri_field(uri, ArcticUriFields.SSL, "", start=1, end=3).rstrip(delimiter) + + # http server with ssl verification doesn't make sense but it is permitted due to historical reason + if "s3" in storages: + if ssl_setting == ParameterDisplayStatus.DISABLE: + uri += f"{delimiter}ssl=False" + elif ssl_setting == ParameterDisplayStatus.ENABLE: + uri += f"{delimiter}ssl=True" if client_cert_file == ParameterDisplayStatus.DISABLE: - uri += "&CA_cert_path=" + uri += f"{delimiter}CA_cert_path=" elif client_cert_file == ParameterDisplayStatus.ENABLE: - uri += f"&CA_cert_path={s3_no_ssl_storage.factory.client_cert_file}" - ac = Arctic(uri) - lib = ac.create_library("test") - lib.write("sym", pd.DataFrame()) - - -@S3_SSL_TESTS_MARK -def test_s3_ca_directory_ssl_verification(s3_storage): - uri = f"s3s://{s3_storage.factory.host}:{s3_storage.bucket}?access={s3_storage.key.id}" \ - f"&secret={s3_storage.key.secret}&CA_cert_path=&CA_cert_dir={s3_storage.factory.client_cert_dir}&ssl=True" - if s3_storage.factory.port: - uri += f"&port={s3_storage.factory.port}" - ac = Arctic(uri) - lib = ac.create_library("test") - lib.write("sym", pd.DataFrame()) - - -@S3_SSL_TESTS_MARK -def test_s3_https_backend_without_ssl_verification(s3_storage): - uri = f"s3s://{s3_storage.factory.host}:{s3_storage.bucket}?access={s3_storage.key.id}&secret={s3_storage.key.secret}&ssl=False" - if s3_storage.factory.port: - uri += f"&port={s3_storage.factory.port}" + assert storage.factory.client_cert_file + uri += f"{delimiter}CA_cert_path={storage.factory.client_cert_file}" + if client_cert_dir == ParameterDisplayStatus.DISABLE: + uri += f"{delimiter}CA_cert_dir=" + elif client_cert_dir == ParameterDisplayStatus.ENABLE: + assert storage.factory.client_cert_dir + uri += f"{delimiter}CA_cert_dir={storage.factory.client_cert_dir}" ac = Arctic(uri) lib = ac.create_library("test") lib.write("sym", pd.DataFrame()) @@ -909,14 +919,6 @@ def test_get_uri(fixture, request): assert ac.get_uri() == storage_fixture.arctic_uri -@AZURE_TESTS_MARK # GH issue #1060 -def test_azure_no_ca_path(azurite_storage: StorageFixture): - uri = azurite_storage.replace_uri_field(azurite_storage.arctic_uri, ArcticUriFields.CA_PATH, "", start=1, end=3) - assert "CA_cert_path" not in uri - ac = Arctic(uri.rstrip(";")) - ac.create_library("x") - - @AZURE_TESTS_MARK def test_azure_sas_token(azurite_storage_factory: StorageFixtureFactory): with azurite_storage_factory.enforcing_permissions_context(): diff --git a/python/tests/scripts/test_update_storage.py b/python/tests/scripts/test_update_storage.py index 919a6bb9d03..d97c9744059 100644 --- a/python/tests/scripts/test_update_storage.py +++ b/python/tests/scripts/test_update_storage.py @@ -12,7 +12,7 @@ from arcticdb.storage_fixtures.azure import AzureContainer from arcticdb.storage_fixtures.s3 import S3Bucket from arcticdb.adapters.s3_library_adapter import USE_AWS_CRED_PROVIDERS_TOKEN -from tests.util.mark import AZURE_TESTS_MARK +from tests.util.mark import AZURE_TESTS_MARK, SSL_TEST_ENABLED LIB_NAME = "test_lib" @@ -105,6 +105,9 @@ def test_upgrade_script_s3_rbac_ok(s3_storage: S3Bucket, monkeypatch): @AZURE_TESTS_MARK def test_upgrade_script_dryrun_azure(azurite_storage: AzureContainer): # Given + if SSL_TEST_ENABLED: + # azurite factory doesn't set client_cert_dir by default + azurite_storage.arctic_uri += f";CA_cert_dir={azurite_storage.factory.client_cert_dir}" ac = azurite_storage.create_arctic() create_library_config(ac, LIB_NAME) @@ -114,7 +117,8 @@ def test_upgrade_script_dryrun_azure(azurite_storage: AzureContainer): # Then config = ac._library_manager.get_library_config(LIB_NAME) azure_storage = _get_azure_storage_config(config) - assert azure_storage.ca_cert_path == azurite_storage.factory.ca_cert_path + assert azure_storage.ca_cert_path == azurite_storage.factory.client_cert_file + assert azure_storage.ca_cert_dir == azurite_storage.factory.client_cert_dir assert azure_storage.container_name == azurite_storage.container assert azurite_storage.factory.account_name in azure_storage.endpoint assert azurite_storage.factory.account_key in azure_storage.endpoint @@ -125,6 +129,9 @@ def test_upgrade_script_dryrun_azure(azurite_storage: AzureContainer): @AZURE_TESTS_MARK def test_upgrade_script_azure(azurite_storage: AzureContainer): # Given + if SSL_TEST_ENABLED: + # azurite factory doesn't set client_cert_dir by default + azurite_storage.arctic_uri += f";CA_cert_dir={azurite_storage.factory.client_cert_dir}" ac = azurite_storage.create_arctic() create_library_config(ac, LIB_NAME) @@ -133,6 +140,7 @@ def test_upgrade_script_azure(azurite_storage: AzureContainer): config = ac._library_manager.get_library_config(LIB_NAME) azure_storage = _get_azure_storage_config(config) assert azure_storage.ca_cert_path == "" + assert azure_storage.ca_cert_dir == "" assert azure_storage.container_name == "" assert azure_storage.endpoint == "" assert azure_storage.prefix.startswith(LIB_NAME) diff --git a/python/tests/util/mark.py b/python/tests/util/mark.py index 4b2da35e3ec..fa711861ae4 100644 --- a/python/tests/util/mark.py +++ b/python/tests/util/mark.py @@ -51,12 +51,12 @@ """Windows and MacOS have different handling of self-signed CA cert for test. TODO: https://github.com/man-group/ArcticDB/issues/1394""" -S3_SSL_TEST_ENABLED = sys.platform == "linux" +SSL_TEST_ENABLED = sys.platform == "linux" -S3_SSL_TESTS_MARK = pytest.mark.skipif( - not S3_SSL_TEST_ENABLED, - reason="Additional S3 test only when SSL is ON", +SSL_TESTS_MARK = pytest.mark.skipif( + not SSL_TEST_ENABLED, + reason="When SSL tests are enabled", )