Skip to content

Commit

Permalink
Read certs from node attribute for now.
Browse files Browse the repository at this point in the history
  • Loading branch information
skottmckay committed Aug 22, 2023
1 parent 9b55386 commit 98b6a94
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 6,368 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ if(OCOS_ENABLE_AZURE)
set(zlib_LIB_NAME "zlib")
target_link_directories(ocos_operators PUBLIC ${VCPKG_SRC}/installed/${vcpkg_triplet}/lib)
endif()
target_link_libraries(ocos_operators PUBLIC httpclient_static ${curl_LIB_NAME} ${zlib_LIB_NAME} ws2_32 crypt32 Wldap32)
target_link_libraries(ocos_operators PUBLIC httpclient_static ${curl_LIB_NAME} libcrypto libssl ${zlib_LIB_NAME} ws2_32 crypt32 Wldap32)
else()
find_package(ZLIB REQUIRED)

Expand Down Expand Up @@ -768,7 +768,7 @@ if(_BUILD_SHARED_LIBRARY)
file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h" "shared/*.def")
add_library(extensions_shared SHARED ${shared_TARGET_SRC})

# We need to propagate OCOS_SHARED_LIBRARY if set.
# We need to propagate OCOS_SHARED_LIBRARY if set.
# could specifically add that if using OCOS_COMPILE_DEFINITIONS is too much.
target_compile_definitions(extensions_shared PRIVATE ${OCOS_COMPILE_DEFINITIONS})

Expand Down
3,363 changes: 0 additions & 3,363 deletions operators/azure/curl_certs/cacert-2023-05-30.pem

This file was deleted.

1 change: 0 additions & 1 deletion operators/azure/curl_certs/cacert-2023-05-30.pem.sha256

This file was deleted.

2,935 changes: 0 additions & 2,935 deletions operators/azure/curl_certs/cacert.pem.inc

This file was deleted.

43 changes: 0 additions & 43 deletions operators/azure/curl_certs/create_inc_file_with_certs.py

This file was deleted.

14 changes: 0 additions & 14 deletions operators/azure/curl_certs/readme.md

This file was deleted.

29 changes: 21 additions & 8 deletions operators/azure/curl_invoker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ namespace {
// need to do in memory cert on Android pending finding a way to use the system certs.
#if defined(USE_IN_MEMORY_CURL_CERTS)
// based on the approach from https://curl.se/libcurl/c/cacertinmem.html
X509_STORE* CreateX509Store() {
// the #include defines `static const char curl_pem[] = ...;` with the certs
#include "curl_certs/cacert.pem.inc"
X509_STORE* CreateX509Store(const std::string& certs) {
bool success = false;
X509_STORE* cts = X509_STORE_new();
if (!cts) {
Expand All @@ -36,7 +34,7 @@ X509_STORE* CreateX509Store() {
}
});

BIO* cbio = BIO_new_mem_buf(curl_pem, sizeof(curl_pem));
BIO* cbio = BIO_new_mem_buf(certs.data(), certs.length());
if (!cbio) {
ORTX_CXX_API_THROW("BIO_new_mem_buf returned nullptr", ORT_RUNTIME_EXCEPTION);
}
Expand Down Expand Up @@ -71,15 +69,16 @@ X509_STORE* CreateX509Store() {
return cts;
}

X509_STORE* GetCertificateStore() {
static std::unique_ptr<X509_STORE, decltype(&X509_STORE_free)> store{CreateX509Store(), &X509_STORE_free};
X509_STORE* GetCertificateStore(const std::string& certs) {
// first call populates the store. `certs` is ignored after that.
static std::unique_ptr<X509_STORE, decltype(&X509_STORE_free)> store{CreateX509Store(certs), &X509_STORE_free};

return store.get();
}

CURLcode sslctx_function(CURL* /*curl*/, void* sslctx, void* /*parm*/) {
// Need to use SSL_CTX_set1_cert_store so the ref count on the store gets incremented correctly.
SSL_CTX_set1_cert_store(static_cast<SSL_CTX*>(sslctx), GetCertificateStore());
// Need to use SSL_CTX_set1_cert_store so the ref count on the store gets incremented correctly.
SSL_CTX_set1_cert_store(static_cast<SSL_CTX*>(sslctx), GetCertificateStore(""));

return CURLE_OK;
}
Expand Down Expand Up @@ -135,6 +134,20 @@ CurlHandler::CurlHandler() : curl_(curl_easy_init(), curl_easy_cleanup),

CurlInvoker::CurlInvoker(const OrtApi& api, const OrtKernelInfo& info)
: CloudBaseKernel(api, info) {
#if defined(USE_IN_MEMORY_CURL_CERTS)
std::string x509_certs;
// attribute not present or empty. there could be other Azure operator nodes in the model though and we only need
// one to provide the certs.
if (TryToGetAttribute(kX509Certificates, x509_certs) && !x509_certs.empty()) {
// populate certificate store
static_cast<void>(GetCertificateStore(x509_certs));
} else {
KERNEL_LOG(GetLogger(), ORT_LOGGING_LEVEL_WARNING,
(std::string(kX509Certificates) +
" attribute is required on Android from at least one Azure custom operator in the model")
.c_str());
}
#endif
}

void CurlInvoker::ComputeImpl(const ortc::Variadic& inputs, ortc::Variadic& outputs) const {
Expand Down
7 changes: 7 additions & 0 deletions operators/azure/curl_invoker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ class CurlInvoker : public CloudBaseKernel {
CurlInvoker(const OrtApi& api, const OrtKernelInfo& info);
virtual ~CurlInvoker() = default;

// attribute name containing x509 certificates for use in SSL requests. required on Android.
// should only contain the certs (i.e. sequential blocks of
// "-----BEGIN CERTIFICATE-----", certificate contents, "-----END CERTIFICATE-----"
// with no comments or other text in between.
// see /test/data/azure/get_certs_for_model.py for example processing of a PEM file to extract the certificates.
static constexpr const char* kX509Certificates = "x509_certificates";

// Compute implementation that is used to co-ordinate all Curl based Azure requests.
// Derived classes need their own Compute to work with the CustomOpLite infrastructure
void ComputeImpl(const ortc::Variadic& inputs, ortc::Variadic& outputs) const;
Expand Down
Empty file added test/data/azure/__init__.py
Empty file.
10 changes: 8 additions & 2 deletions test/data/azure/create_openai_whisper_transcriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from onnx import helper, numpy_helper, TensorProto

import onnx
import numpy as np
import sys

from get_certs_for_model import get_certs_from_url
from onnx import helper, numpy_helper, TensorProto

# ORT 1.14 only supports IR version 8 so if we're unit testing with the oldest version of ORT that can be used
# with the Azure ops we need to use this version instead of onnx.IR_VERSION
MODEL_IR_VERSION = 8
Expand All @@ -33,6 +34,10 @@ def make_graph(*args, doc_string=None, **kwargs):
return graph


# need to include the certs for curl+openssl on Android in the model as a node attribute
x509_certs = get_certs_from_url("https://curl.se/ca/cacert.pem")
assert x509_certs

# This creates a model that allows the prompt and filename to be optionally provided as inputs.
# The filename can be specified to indicate a different audio type to the default value in the audio_format attribute.
model = helper.make_model(
Expand Down Expand Up @@ -66,6 +71,7 @@ def make_graph(*args, doc_string=None, **kwargs):
audio_format='wav', # default audio type if filename is not specified.
model_uri='https://api.openai.com/v1/audio/transcriptions',
model_name='whisper-1',
x509_certificates=x509_certs,
verbose=0,
),
],
Expand Down
98 changes: 98 additions & 0 deletions test/data/azure/get_certs_for_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# Curl + openssl has issues reading the system certificates on Android.
# Pending a better solution we create an in-memory certificate store from certificates included in the model.
#
# The certificates must be added to the first Azure operator in the model in an attribute called 'x509_certificates'.
# The user must determine the correct certificates for their scenario, and add them to the model.
# The PEM file from https://curl.se/docs/caextract.html may be used.
#
# Include this file in the python script that is creating your model with Azure custom operators.
# Get the value to use in the 'x509_certificates' attribute from eith a file (call get_certs_from_file) or
# a url (call get_certs_from_url)
#
# See create_openai_whisper_transcriptions.py for example usage.
#
# Notes:
#
# - Supposedly if openssl uses md5 hashing for the certificates in /system/etc/security/cacerts it should work, but
# a patched version of openssl with this change still failed.
# - The 'better' solution might be to use boringssl instead of openssl as it handles the certificate format in
# /system/etc/security/cacerts, although even that is potentially problematic as there's no versioning of boringssl.

import pathlib
import tempfile


def _get_certs_from_input(input_data):
certs = None

# strip out everything except the certs as per https://curl.se/libcurl/c/cacertinmem.html example.
with tempfile.TemporaryFile(mode='w+', encoding='utf-8') as out:
in_cert = False
num_certs = 0
for line in input_data.readlines():
if not in_cert:
in_cert = "-----BEGIN CERTIFICATE-----" in line
if in_cert:
num_certs += 1

if in_cert:
out.write(line)
in_cert = "-----END CERTIFICATE-----" not in line

assert num_certs > 0
assert not in_cert # mismatched begin/end if not false
print(f"Processed {num_certs} certificates")

# rewind and return as UTF-8 string
out.seek(0)
certs = out.read()

return certs


def get_certs_from_url(url: str):
"""
Read the contents of a url that returns a PEM file, and return the certificates as a UTF-8 string
for inclusion as a node attribute of an Azure custom operator.
e.g. https://curl.se/ca/cacert.pem
:param url: URL that returns the PEM file contents
:return: UTF-8 string containing the certificates
"""
import urllib.request
certs = None

with tempfile.TemporaryFile(mode="w+", encoding='utf-8') as tmpfile, urllib.request.urlopen(url) as url_input:
data = url_input.read()
tmpfile.write(data.decode('utf-8'))
tmpfile.seek(0)

certs = _get_certs_from_input(tmpfile)

return certs


def get_certs_from_file(pem_filename: pathlib.Path):
"""
Read the contents of a PEM file and return the certificates as a UTF-8 string for inclusion as a node attribute
of an Azure custom operator.
:param pem_filename: path to the PEM file
:return: UTF-8 string containing the certificates
"""

certs = None
pem_filename = pem_filename.resolve(strict=True)
with open(pem_filename) as input_data:
certs = _get_certs_from_input(input_data)

return certs


# examples for testing
# if __name__ == "__main__":
# a = get_certs_from_file(pathlib.Path("cacert.pem"))
# b = get_certs_from_url("https://curl.se/ca/cacert.pem")
# assert(a == b)
Binary file modified test/data/azure/openai_whisper_transcriptions.onnx
Binary file not shown.

0 comments on commit 98b6a94

Please sign in to comment.