Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support tokenizers build only in C API mode #783

Merged
merged 5 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .pipelines/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,13 @@ stages:
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests

- bash: |
set -e -x -u
./build.sh -DOCOS_BUILD_PRESET=token_api_only -DOCOS_BUILD_SHARED_LIB=OFF
cd out/Linux
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with tokenizer API only enabled and run tests


- stage: MacOSBuilds
dependsOn: []
Expand Down
43 changes: 32 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ option(OCOS_ENABLE_STATIC_LIB "Enable generating static library" OFF)
option(OCOS_ENABLE_SELECTED_OPLIST "Enable including the selected_ops tool file" OFF)
option(OCOS_ENABLE_C_API "Enable building the C API" OFF)

option(OCOS_BUILD_SHARED_LIB "Enable building the dynamic library" ON)
option(OCOS_BUILD_PYTHON "Enable building the Python package" OFF)
option(OCOS_BUILD_JAVA "Enable building the Java package" OFF)
option(OCOS_BUILD_ANDROID "Enable building the Android package" OFF)
Expand Down Expand Up @@ -698,7 +699,7 @@ endif()

# If building a shared library we can't throw an internal exception type across the library boundary as the type
# will be unknown. Set a compile definition so the code can adjust to the build type.
if(NOT OCOS_ENABLE_STATIC_LIB)
if(OCOS_BUILD_SHARED_LIB)
list(APPEND OCOS_COMPILE_DEFINITIONS OCOS_SHARED_LIBRARY)
endif()

Expand All @@ -724,15 +725,32 @@ list(APPEND ocos_libraries noexcep_operators)
target_compile_definitions(ocos_operators PRIVATE ${OCOS_COMPILE_DEFINITIONS})
target_link_libraries(ocos_operators PRIVATE ${ocos_libraries})

set (file_patterns "shared/lib/*.cc")
if (OCOS_ENABLE_C_API)
list(APPEND file_patterns "shared/api/*.h*" "shared/api/*.c" "shared/api/*.cc")
file(GLOB _TARGET_LIB_SRC "shared/lib/*.cc")
if(OCOS_ENABLE_C_API)
file(GLOB utils_TARGET_SRC "shared/api/c_api_utils.*" "shared/api/runner.hpp")
list(APPEND _TARGET_LIB_SRC ${utils_TARGET_SRC})
if(_HAS_TOKENIZER)
file(GLOB tok_TARGET_SRC "shared/api/c_api_tokenizer.cc" "shared/api/token*")
list(APPEND _TARGET_LIB_SRC ${tok_TARGET_SRC})
endif()
if(OCOS_ENABLE_AUDIO)
file(GLOB audio_TARGET_SRC "shared/api/c_api_feature_extraction.*" "shared/api/speech_*")
list(APPEND _TARGET_LIB_SRC ${audio_TARGET_SRC})
endif()
if(OCOS_ENABLE_CV2)
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
endif()
endif()

file(GLOB shared_TARGET_LIB_SRC ${file_patterns})

if(NOT OCOS_ENABLE_STATIC_LIB AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
add_executable(ortcustomops ${shared_TARGET_LIB_SRC})
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if(OCOS_ENABLE_STATIC_LIB)
message(FATAL_ERROR "Emscripten build does not support building a static library.")
endif()
# Emscripten does not support building a shared library with custom ops.
# and backward compatible with the previous version, we silently turn off the shared library build.
set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)
add_executable(ortcustomops ${_TARGET_LIB_SRC})
set_target_properties(ortcustomops PROPERTIES LINK_FLAGS " \
-s WASM=1 \
-s NO_EXIT_RUNTIME=0 \
Expand All @@ -751,13 +769,12 @@ if(NOT OCOS_ENABLE_STATIC_LIB AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_property(TARGET ortcustomops APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s DEMANGLE_SUPPORT=0")
endif()
else()
add_library(ortcustomops STATIC ${shared_TARGET_LIB_SRC})
add_library(ortcustomops STATIC ${_TARGET_LIB_SRC})
if (HAS_SDL)
target_compile_options(ortcustomops PRIVATE "/sdl")
endif()
add_library(onnxruntime_extensions ALIAS ortcustomops)
standardize_output_folder(ortcustomops)
set(_BUILD_SHARED_LIBRARY TRUE)
endif()
set_target_properties(ortcustomops PROPERTIES FOLDER "operators")

Expand Down Expand Up @@ -832,9 +849,12 @@ target_include_directories(ortcustomops PUBLIC "$<TARGET_PROPERTY:ocos_operators

target_link_libraries(ortcustomops PUBLIC ocos_operators)

if(_BUILD_SHARED_LIBRARY)
if(OCOS_BUILD_SHARED_LIB)
file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h")
if (OCOS_ENABLE_C_API)
if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_CV2 OR NOT OCOS_ENABLE_AUDIO)
message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, CV2 and AUDIO to be enabled.")
endif()
list(APPEND shared_TARGET_SRC "shared/extensions_c.def")
else()
list(APPEND shared_TARGET_SRC "shared/ortcustomops.def")
Expand Down Expand Up @@ -885,6 +905,7 @@ endif()

if(OCOS_BUILD_PYTHON)
message(STATUS "Python Build is enabled")
set(shared_TARGET_LIB_SRC ${_TARGET_LIB_SRC}) # these library file are also needed for python build
include(ext_python)
endif()

Expand Down
142 changes: 72 additions & 70 deletions cmake/ext_tests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ add_test_target(TARGET ocos_test
LIBRARIES ortcustomops ${ocos_libraries})
target_compile_definitions(ocos_test PRIVATE ${OCOS_COMPILE_DEFINITIONS})

if (OCOS_ENABLE_C_API)
if (OCOS_ENABLE_C_API AND OCOS_BUILD_SHARED_LIB)
file(GLOB pp_api_TEST_SRC
"${TEST_SRC_DIR}/pp_api_test/*.c"
"${TEST_SRC_DIR}/pp_api_test/*.cc"
Expand Down Expand Up @@ -163,73 +163,75 @@ else()
find_library(ONNXRUNTIME onnxruntime HINTS "${ONNXRUNTIME_LIB_DIR}")
endif()

if("${ONNXRUNTIME}" STREQUAL "ONNXRUNTIME-NOTFOUND")
message(WARNING "The prebuilt onnxruntime library was not found, extensions_test will be skipped.")
else()
block()
if(NOT IOS)
set(use_extensions_shared_library 1)
endif()

set(extensions_target $<IF:$<BOOL:${use_extensions_shared_library}>,extensions_shared,ortcustomops>)

file(GLOB shared_TEST_SRC
"${TEST_SRC_DIR}/shared_test/*.cc"
"${TEST_SRC_DIR}/shared_test/*.hpp")

set(extensions_test_libraries ${extensions_target} ${ONNXRUNTIME})

if(use_extensions_shared_library)
list(APPEND extensions_test_libraries ${ocos_libraries})
endif()

# needs to link with stdc++fs in Linux
if(LINUX)
list(APPEND extensions_test_libraries stdc++fs -pthread)
endif()

add_test_target(TARGET extensions_test
TEST_SOURCES ${shared_TEST_SRC}
LIBRARIES ${extensions_test_libraries}
TEST_DATA_DIRECTORIES ${TEST_SRC_DIR}/data)

target_include_directories(extensions_test PRIVATE ${spm_INCLUDE_DIRS})

target_compile_definitions(extensions_test PUBLIC ${OCOS_COMPILE_DEFINITIONS})
if(use_extensions_shared_library)
target_compile_definitions(extensions_test PUBLIC ORT_EXTENSIONS_UNIT_TEST_USE_EXTENSIONS_SHARED_LIBRARY)
endif()

# FUTURE: This is required to use the ORT C++ API with delayed init which must be done conditionally using
# ifdef OCOS_BUILD_SHARED in RegisterCustomOps and where onnxruntime_cxx_api.h is included .
# ---
# We have to remove the OCOS_BUILD_SHARED when building the test code. It is used to delay population of the
# ORT api pointer until RegisterCustomOps is called, but the test code needs to create an ORT env which requires
# the pointer to exist.
# set(test_compile_definitions ${OCOS_COMPILE_DEFINITIONS})
# remove(test_compile_definitions "OCOS_SHARED_LIBRARY")
# target_compile_definitions(extensions_test PUBLIC ${test_compile_definitions})

# Copy onnxruntime DLL files into the same directory as the test binary.
if(WIN32)
file(TO_CMAKE_PATH "${ONNXRUNTIME_LIB_DIR}/*" ONNXRUNTIME_LIB_FILEPATTERN)
file(GLOB ONNXRUNTIME_LIB_FILES CONFIGURE_DEPENDS "${ONNXRUNTIME_LIB_FILEPATTERN}")
add_custom_command(
TARGET extensions_test POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME_LIB_FILES} $<TARGET_FILE_DIR:extensions_test>)
endif()

# Copy onnxruntime shared library to known location for easy access, e.g., for adb push to emulator or device.
if(ANDROID)
add_custom_command(
TARGET extensions_test POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME} ${CMAKE_BINARY_DIR}/lib
)
endif()

if (OCOS_ENABLE_C_API)
# avoid copying the same data directory at the same time.
add_dependencies(extensions_test pp_api_test)
endif()
endblock()
if (OCOS_BUILD_SHARED_LIB)
if("${ONNXRUNTIME}" STREQUAL "ONNXRUNTIME-NOTFOUND")
message(WARNING "The prebuilt onnxruntime library was not found, extensions_test will be skipped.")
else()
block()
if(NOT IOS)
set(use_extensions_shared_library 1)
endif()

set(extensions_target $<IF:$<BOOL:${use_extensions_shared_library}>,extensions_shared,ortcustomops>)

file(GLOB shared_TEST_SRC
"${TEST_SRC_DIR}/shared_test/*.cc"
"${TEST_SRC_DIR}/shared_test/*.hpp")

set(extensions_test_libraries ${extensions_target} ${ONNXRUNTIME})

if(use_extensions_shared_library)
list(APPEND extensions_test_libraries ${ocos_libraries})
endif()

# needs to link with stdc++fs in Linux
if(LINUX)
list(APPEND extensions_test_libraries stdc++fs -pthread)
endif()

add_test_target(TARGET extensions_test
TEST_SOURCES ${shared_TEST_SRC}
LIBRARIES ${extensions_test_libraries}
TEST_DATA_DIRECTORIES ${TEST_SRC_DIR}/data)

target_include_directories(extensions_test PRIVATE ${spm_INCLUDE_DIRS})

target_compile_definitions(extensions_test PUBLIC ${OCOS_COMPILE_DEFINITIONS})
if(use_extensions_shared_library)
target_compile_definitions(extensions_test PUBLIC ORT_EXTENSIONS_UNIT_TEST_USE_EXTENSIONS_SHARED_LIBRARY)
endif()

# FUTURE: This is required to use the ORT C++ API with delayed init which must be done conditionally using
# ifdef OCOS_BUILD_SHARED in RegisterCustomOps and where onnxruntime_cxx_api.h is included .
# ---
# We have to remove the OCOS_BUILD_SHARED when building the test code. It is used to delay population of the
# ORT api pointer until RegisterCustomOps is called, but the test code needs to create an ORT env which requires
# the pointer to exist.
# set(test_compile_definitions ${OCOS_COMPILE_DEFINITIONS})
# remove(test_compile_definitions "OCOS_SHARED_LIBRARY")
# target_compile_definitions(extensions_test PUBLIC ${test_compile_definitions})

# Copy onnxruntime DLL files into the same directory as the test binary.
if(WIN32)
file(TO_CMAKE_PATH "${ONNXRUNTIME_LIB_DIR}/*" ONNXRUNTIME_LIB_FILEPATTERN)
file(GLOB ONNXRUNTIME_LIB_FILES CONFIGURE_DEPENDS "${ONNXRUNTIME_LIB_FILEPATTERN}")
add_custom_command(
TARGET extensions_test POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME_LIB_FILES} $<TARGET_FILE_DIR:extensions_test>)
endif()

# Copy onnxruntime shared library to known location for easy access, e.g., for adb push to emulator or device.
if(ANDROID)
add_custom_command(
TARGET extensions_test POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME} ${CMAKE_BINARY_DIR}/lib
)
endif()

if (OCOS_ENABLE_C_API)
# avoid copying the same data directory at the same time.
add_dependencies(extensions_test pp_api_test)
endif()
endblock()
endif()
endif()
2 changes: 2 additions & 0 deletions docs/c_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ Most APIs accept raw data inputs such as audio, image compressed binary formats,
**Image processing:** `OrtxCreateProcessor` can create an image processor object from a pre-defined workflow in JSON format to process image files into a tensor-like data type. An example code snippet can be found [here](../test/pp_api_test/test_processor.cc#L75).

**Audio feature extraction:** `OrtxCreateSpeechFeatureExtractor` creates a speech feature extractor to obtain log mel spectrum data as input for the Whisper model. An example code snippet can be found [here](../test/pp_api_test/test_feature_extraction.cc#L16).

NB: If onnxruntime-extensions is to build as a shared library, which requires the OCOS_ENABLE_AUDIO OCOS_ENABLE_CV2 OCOS_ENABLE_OPENCV_CODECS OCOS_ENABLE_GPT2_TOKENIZER build flags are ON to have a full function of binary. Only onnxruntime-extensions static library can be used for a minimal build with the selected operators, so in that case, the shared library build can be switched off by `-DOCOS_BUILD_SHARED_LIB=OFF`.
4 changes: 4 additions & 0 deletions test/static_test/test_tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class LocaleBaseTest : public testing::Test {
std::string default_locale_;
};

#if defined(ENABLE_WORDPIECE_TOKENIZER) && defined(ENABLE_BERT_TOKENIZER)

TEST(tokenizer, bert_word_split) {
ustring ind("##");
ustring text("A AAA B BB");
Expand Down Expand Up @@ -261,3 +263,5 @@ TEST(tokenizer, basic_tok_eager) {
tokenizer.Compute(test_case, output);
EXPECT_EQ(output.Data(), expect_result);
}

#endif
Loading