Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onnx,onnxruntime] new port for v1.19.2 with onnx 1.16.0 #36850

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
41a5b63
[onnx] update to v1.16.0
luncliff Jul 5, 2024
e27a0d1
[onnx-optimizer] provide ONNX_TARGET_NAME
luncliff Jul 5, 2024
10d5405
[openvino] support onnx 1.16.0
luncliff Aug 25, 2024
02635ad
[onnxruntime] create port with v1.18.0
luncliff Aug 25, 2024
5c945e4
[onnxruntime] support TensorRT build
luncliff Aug 25, 2024
72177b0
[libtorch] support onnx v1.16.0
luncliff Aug 25, 2024
4e24728
[onnxruntime-gpu] use onnxruntime[cuda], without TensorRT
luncliff Aug 15, 2024
4e4e44e
[onnxruntime] update to v1.19.0
luncliff Sep 13, 2024
7d9da66
[onnxruntime] suppress NVCC warnings from Microsoft.GSL attribute
luncliff Sep 13, 2024
28302ed
[onnxruntime] back to v1.18.0
luncliff Sep 14, 2024
92ea700
Merge branch 'master' into port/onnxruntime
luncliff Sep 14, 2024
5408cb3
[libtorch] fix port version after merge
luncliff Sep 14, 2024
942b310
[onnxruntime] update baseline
luncliff Sep 14, 2024
1fc4cc0
[onnxruntime] remove safeint_interface in PUBLIC
luncliff Sep 14, 2024
47b8289
[onnxruntime] remove nested eigen sources
luncliff Sep 16, 2024
e8cc89a
[onnxruntime] bump to 1.19.0 again
luncliff Sep 16, 2024
74d7adf
[onnxruntime] create revert patch, PR 21492
luncliff Sep 16, 2024
6bc3ffc
[onnxruntime] bump to v1.19.2
luncliff Oct 1, 2024
ca1dba4
Merge branch 'master' into port/onnxruntime
luncliff Oct 1, 2024
aeaa20b
[openvino] support onnx 1.16.0
luncliff Oct 1, 2024
be8880a
[onnxruntime-gpu] update port-version, baseline
luncliff Oct 2, 2024
b1a1c39
x64-windows skips onnxruntime-gpu
luncliff Oct 9, 2024
4abfe13
Merge branch 'master' into port/onnxruntime
luncliff Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions ports/libtorch/fix-build.patch
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ index b46a444..255d7f4 100644
- endif()
- set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
- message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+ find_package(ONNX REQUIRED)
+ set(ONNX_LIBRARY onnx)
+ set(ONNX_PROTO_LIBRARY onnx_proto)
+ set(ONNX_LIBRARY ONNX::onnx)
+ set(ONNX_PROTO_LIBRARY ONNX::onnx_proto)
+ message("-- Found onnx")
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+ list(APPEND Caffe2_DEPENDENCY_LIBS ONNX::onnx_proto ONNX::onnx)
endif()
include_directories(${FOXI_INCLUDE_DIRS})
list(APPEND Caffe2_DEPENDENCY_LIBS "${FOXI_LOADER_LIBPATH}")
diff --git a/pyproject.toml b/pyproject.toml
index eb764cb..c70f317 100644
--- a/pyproject.toml
Expand Down
2 changes: 1 addition & 1 deletion ports/libtorch/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "libtorch",
"version": "2.1.2",
"port-version": 6,
"port-version": 7,
"description": "Tensors and Dynamic neural networks in Python with strong GPU acceleration",
"homepage": "https://pytorch.org/",
"license": null,
Expand Down
1 change: 1 addition & 0 deletions ports/onnx-optimizer/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ vcpkg_cmake_configure(
OPTIONS
${FEATURE_OPTIONS}
-DONNX_USE_MSVC_STATIC_RUNTIME=${USE_STATIC_RUNTIME}
-DONNX_TARGET_NAME=ONNX::onnx # after 1.16.0
)
if("pybind11" IN_LIST FEATURES)
# This target is not in install/export
Expand Down
1 change: 1 addition & 0 deletions ports/onnx-optimizer/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"name": "onnx-optimizer",
"version-semver": "0.3.18",
"port-version": 1,
"description": "Actively maintained ONNX Optimizer",
"homepage": "https://github.com/onnx/optimizer",
"license": "Apache-2.0",
Expand Down
67 changes: 28 additions & 39 deletions ports/onnx/fix-cmakelists.patch
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dd56b6..2ff3e29 100644
index 6d7ca84..b72646f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,27 @@ endif()
@@ -129,6 +129,7 @@ if(ONNX_BUILD_TESTS)
endif()
endif()

include(GNUInstallDirs)
+find_package(protobuf CONFIG REQUIRED)
if((ONNX_USE_LITE_PROTO AND TARGET protobuf::libprotobuf-lite) OR ((NOT ONNX_USE_LITE_PROTO) AND TARGET protobuf::libprotobuf))
# Sometimes we need to use protoc compiled for host architecture while linking
# libprotobuf against target architecture. See https://github.com/caffe2/caffe
@@ -711,6 +712,27 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/onnx
FILES_MATCHING
PATTERN "*.h")

+# install protobuf files
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/onnx/onnx-data.proto
Expand All @@ -27,41 +35,22 @@ index 4dd56b6..2ff3e29 100644
+ )
+endif()
+
set(ONNX_ROOT ${PROJECT_SOURCE_DIR})

# Read ONNX version
@@ -116,7 +137,8 @@ endif()
# find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12
# Use the following command in the future; now this is only compatible with the latest pybind11
# find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED)
-find_package(PythonInterp ${PY_VERSION} REQUIRED)
+find_package(Python3 ${PY_VERSION} COMPONENTS Interpreter REQUIRED)
+set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
if(BUILD_ONNX_PYTHON)
find_package(PythonLibs ${PY_VERSION})
endif()
@@ -434,6 +456,7 @@ target_link_libraries(onnx PUBLIC onnx_proto)
add_onnx_global_defines(onnx)
configure_file(
${PROJECT_SOURCE_DIR}/cmake/ONNXConfigVersion.cmake.in
${PROJECT_BINARY_DIR}/ONNXConfigVersion.cmake
diff --git a/cmake/ONNXConfig.cmake.in b/cmake/ONNXConfig.cmake.in
index d588f8a..dbd4398 100644
--- a/cmake/ONNXConfig.cmake.in
+++ b/cmake/ONNXConfig.cmake.in
@@ -6,9 +6,8 @@
# library version information
set(ONNX_VERSION "@ONNX_VERSION@")

if(BUILD_ONNX_PYTHON)
+ find_package(Python3 ${PY_VERSION} COMPONENTS Development REQUIRED)
if("${PY_EXT_SUFFIX}" STREQUAL "")
if(MSVC)
set(PY_EXT_SUFFIX ".pyd")
@@ -452,10 +475,14 @@ if(BUILD_ONNX_PYTHON)
target_include_directories(onnx_cpp2py_export PRIVATE
$<BUILD_INTERFACE:${ONNX_ROOT}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
- $<INSTALL_INTERFACE:include>)
+ ${Python3_INCLUDE_DIRS})
+ target_link_directories(onnx_cpp2py_export PRIVATE
+ ${Python3_LIBRARY_DIRS})
+ target_link_libraries(onnx_cpp2py_export PRIVATE
+ ${Python3_LIBRARIES})
-list(APPEND CMAKE_PREFIX_PATH "@PROTOBUF_DIR@")
-set(Protobuf_INCLUDE_DIR "@PROTOBUF_INCLUDE_DIR@")
-find_package(Protobuf REQUIRED)
+include(CMakeFindDependencyMacro)
+find_dependency(protobuf CONFIG)

# pybind11 is a header only lib
- find_package(pybind11 2.2 CONFIG)
+ find_package(pybind11 2.2 CONFIG REQUIRED)
if(NOT pybind11_FOUND)
if(EXISTS "${ONNX_ROOT}/third_party/pybind11/include/pybind11/pybind11.h")
add_subdirectory("${ONNX_ROOT}/third_party/pybind11")
# import targets
include ("${CMAKE_CURRENT_LIST_DIR}/ONNXTargets.cmake")
12 changes: 0 additions & 12 deletions ports/onnx/fix-dependency-protobuf.patch

This file was deleted.

7 changes: 1 addition & 6 deletions ports/onnx/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO onnx/onnx
REF "v${VERSION}"
SHA512 b46a4ab70af88053318eba45251c1f71528f15e45a33042877570e8d857febd3ec66e2e811fcda2105a4f17b84c9a1c6a0aaa22756c3287321b3ea29e83127fd
SHA512 ef641447d8d6c4ed9f083793fe14a8568d6aa7b9b7e7b859a4082e9b892acd801230da2027d097ceaa0d68bbd37b2422b89bb7d1d55d5c3b5955c0f9c7c657c5
PATCHES
fix-cmakelists.patch
fix-dependency-protobuf.patch
)

string(COMPARE EQUAL "${VCPKG_CRT_LINKAGE}" "static" USE_STATIC_RUNTIME)
Expand Down Expand Up @@ -64,10 +63,6 @@ vcpkg_cmake_install()
vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/ONNX)

vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/${PORT}/ONNXConfig.cmake" "# import targets"
[[# import targets
include(CMakeFindDependencyMacro)
find_dependency(protobuf CONFIG)]])

file(REMOVE_RECURSE
"${CURRENT_PACKAGES_DIR}/debug/include"
Expand Down
3 changes: 1 addition & 2 deletions ports/onnx/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"name": "onnx",
"version-semver": "1.15.0",
"port-version": 1,
"version-semver": "1.16.0",
"description": "Open standard for machine learning interoperability",
"homepage": "https://onnx.ai",
"license": "Apache-2.0",
Expand Down
104 changes: 2 additions & 102 deletions ports/onnxruntime-gpu/portfile.cmake
Original file line number Diff line number Diff line change
@@ -1,102 +1,2 @@
vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY)

vcpkg_download_distfile(ARCHIVE
URLS "https://github.com/microsoft/onnxruntime/releases/download/v${VERSION}/onnxruntime-win-x64-gpu-${VERSION}.zip"
FILENAME "onnxruntime-win-x64-gpu-${VERSION}.zip"
SHA512 7ab350a2ede0fc8c716cf083e16a9303acbcc855982e53900f8843773ec32fd20a7396f0bde82bb29f382012b1d05dea41708797f112d9096c8b5048fc5eb7d8
)

vcpkg_extract_source_archive(
SOURCE_PATH
ARCHIVE "${ARCHIVE}"
NO_REMOVE_ONE_LEVEL
)

# Download repo for experimental features
vcpkg_from_github(
OUT_SOURCE_PATH REPO_PATH
REPO microsoft/onnxruntime
REF v${VERSION}
SHA512 f2fec4ded88da6bf67ae7d0aa3082736cb3b8ba29e723b5a516d7632b68ce02aed461f24d3e82cbab20757729e0ab45d736bd986c9b7395f2879b16a091c12a1
)

file(COPY
${REPO_PATH}/include/onnxruntime/core/session/experimental_onnxruntime_cxx_api.h
${REPO_PATH}/include/onnxruntime/core/session/experimental_onnxruntime_cxx_inline.h
DESTINATION ${CURRENT_PACKAGES_DIR}/include
)

file(MAKE_DIRECTORY
${CURRENT_PACKAGES_DIR}/include
${CURRENT_PACKAGES_DIR}/lib
${CURRENT_PACKAGES_DIR}/bin
${CURRENT_PACKAGES_DIR}/debug/lib
${CURRENT_PACKAGES_DIR}/debug/bin
)

file(COPY
${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/include
DESTINATION ${CURRENT_PACKAGES_DIR}
)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/lib)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/lib)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/lib)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/lib)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.pdb
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/lib)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/lib)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/lib)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.lib
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/lib)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_shared.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_tensorrt.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)

file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/bin)
file(COPY ${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/lib/onnxruntime_providers_cuda.dll
DESTINATION ${CURRENT_PACKAGES_DIR}/debug/bin)
# # Handle copyright
vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/onnxruntime-win-x64-gpu-${VERSION}/LICENSE")
set(VCPKG_POLICY_EMPTY_PACKAGE enabled)
message(WARNING "${PORT} is deprecated. Please use port onnxruntime instead.")
14 changes: 11 additions & 3 deletions ports/onnxruntime-gpu/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
{
"name": "onnxruntime-gpu",
"version": "1.16.3",
"description": "onnxruntime (GPU)",
"version": "1.18.0",
"description": "Build onnxruntime port with all available GPU related features",
"homepage": "https://github.com/microsoft/onnxruntime",
"license": "MIT",
"supports": "windows & !x86 & !uwp & !static & !arm"
"supports": "windows & !x86 & !uwp & !static & !arm",
"dependencies": [
{
"name": "onnxruntime",
"features": [
"cuda"
]
}
]
}
70 changes: 70 additions & 0 deletions ports/onnxruntime/fix-clang-cl-simd-compile.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index 682dcfc..405c65b 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -158,15 +158,31 @@ function(setup_mlas_source_for_windows)
)
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "/arch:AVX2")

+ file(GLOB_RECURSE mlas_platform_srcs_avx512 CONFIGURE_DEPENDS
+ "${MLAS_SRC_DIR}/intrinsics/avx512/*.cpp"
+ )
+ set(mlas_platform_srcs_amx "${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp")
+
+ # clang-cl requires us to enable the platform feature flags explicitly to compile the intrinsics code
+ # unlike MSVC. See: https://github.com/llvm/llvm-project/issues/53520
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set_source_files_properties(${mlas_platform_srcs_avx512} PROPERTIES COMPILE_FLAGS "/arch:AVX512")
+ set_source_files_properties(${mlas_platform_srcs_amx} PROPERTIES COMPILE_FLAGS "/arch:AVX512 -mamx-tile -mamx-int8")
+ # https://clang.llvm.org/docs/UsersManual.html#cpu-architectures-features-and-limitations
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64")
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64-v2")
+ endif()
+
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/dgemm.cpp
${mlas_platform_srcs_avx}
${mlas_platform_srcs_avx2}
- ${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
+ ${mlas_platform_srcs_avx512}
+ ${mlas_platform_srcs_amx}
+ # ...
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp
- ${MLAS_SRC_DIR}/intrinsics/avx512/quantize_avx512f.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx2.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512vnni.cpp
@@ -208,9 +224,15 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/amd64/ErfKernelFma3.asm
)
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
- target_sources(onnxruntime_mlas PRIVATE
+ set(onnxruntime_mlas_q4gemm_avx512
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
)
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ # clang-cl requires us to enable the platform feature flags explicitly to compile the intrinsics code
+ # unlike MSVC. See: https://github.com/llvm/llvm-project/issues/53520
+ set_source_files_properties(${onnxruntime_mlas_q4gemm_avx512} PROPERTIES COMPILE_FLAGS "/arch:AVX512 -mavx512vnni")
+ endif()
+ target_sources(onnxruntime_mlas PRIVATE ${onnxruntime_mlas_q4gemm_avx512})
endif()
else()
target_sources(onnxruntime_mlas PRIVATE
diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp
index 68931c5..6c095bd 100644
--- a/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp
+++ b/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp
@@ -16,6 +16,10 @@ Abstract:

#include "mlasi.h"
#include "qgemm.h"
+#if defined(__clang__)
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#endif

// N.B. MSVC does not require turning on SSE 4.1 intrinsics and the current use
// for this code is Windows only, so restrict this kernel to that environment.
Loading
Loading