Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify the image operations in extensions library #831

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
6 changes: 3 additions & 3 deletions .pipelines/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
./build.sh -DOCOS_ENABLE_C_API=ON
cd out/Linux/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
Expand Down Expand Up @@ -281,7 +281,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
./build.sh -DOCOS_ENABLE_C_API=ON
cd out/Darwin/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
Expand Down Expand Up @@ -431,7 +431,7 @@ stages:

steps:
- script: |
call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
call .\build.bat -DOCOS_ENABLE_C_API=ON
cd out\Windows
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
Expand Down
3 changes: 1 addition & 2 deletions .pyproject/cmdclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@ def build_cmake(self, extension):
# Disabling openCV can drastically reduce the build time.
cmake_args += [
'-DOCOS_ENABLE_OPENCV_CODECS=OFF',
'-DOCOS_ENABLE_CV2=OFF',
'-DOCOS_ENABLE_VISION=OFF']
'-DOCOS_ENABLE_CV2=OFF']

if self.pp_api:
if not self.no_opencv:
Expand Down
47 changes: 22 additions & 25 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ option(OCOS_ENABLE_BLINGFIRE "Enable operators depending on the Blingfire librar
option(OCOS_ENABLE_MATH "Enable math tensor operators building" ON)
option(OCOS_ENABLE_DLIB "Enable operators like Inverse depending on DLIB" ON)
option(OCOS_ENABLE_VENDOR_IMAGE_CODECS "Enable and use vendor image codecs if supported over libpng & libjpeg" OFF)
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." ON)
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" ON)
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." OFF)
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" OFF)
option(OCOS_ENABLE_VISION "Enable the operators in `operators/vision`" ON)
option(OCOS_ENABLE_AUDIO "Enable the operators for audio processing" ON)
option(OCOS_ENABLE_AZURE "Enable the operators for azure execution provider" OFF)
Expand Down Expand Up @@ -383,7 +383,7 @@ if (OCOS_USE_CUDA)
endif()

# enable the opencv dependency if we have ops that require it
if(OCOS_ENABLE_CV2 OR OCOS_ENABLE_VISION)
if(OCOS_ENABLE_CV2)
set(_ENABLE_OPENCV ON)
message(STATUS "Fetch opencv")
include(opencv)
Expand All @@ -402,10 +402,6 @@ if(OCOS_ENABLE_CV2)
endif()

if(OCOS_ENABLE_VISION)
if(NOT OCOS_ENABLE_OPENCV_CODECS)
message(FATAL_ERROR "OCOS_ENABLE_VISION requires OCOS_ENABLE_OPENCV_CODECS to be ON")
endif()

file(GLOB TARGET_SRC_VISION "operators/vision/*.cc" "operators/vision/*.h*")
list(APPEND TARGET_SRC ${TARGET_SRC_VISION})
endif()
Expand Down Expand Up @@ -653,6 +649,25 @@ endif()

if(OCOS_ENABLE_VISION)
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_VISION)
set(_DEFAULT_CODEC_ENABLE ON)
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
if(WIN32)
# Use WIC on Windows. Nothing to be done
set(_DEFAULT_CODEC_ENABLE OFF)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
# Use ImageIO on Apple platforms
set(_DEFAULT_CODEC_ENABLE OFF)
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
endif()
endif()

set(_DEFAULT_CODEC_ENABLE ON) # libpng and libjpeg can be optional after EncodeImage with native support too.
if(_DEFAULT_CODEC_ENABLE)
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
endif()

if(OCOS_ENABLE_AZURE)
Expand Down Expand Up @@ -740,24 +755,6 @@ if(OCOS_ENABLE_C_API)
if(OCOS_ENABLE_DLIB)
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
if(WIN32)
# Use WIC on Windows. Nothing to be done
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
# Use ImageIO on Apple platforms
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
else()
# Fallback to libpng & libjpeg on all other platforms
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
else()
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
endif()
endif()

Expand Down
4 changes: 1 addition & 3 deletions cmake/presets/ort_genai.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_OPENCV_CODECS OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_CV2 OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VISION OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VISION ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VENDOR_IMAGE_CODECS ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)
Expand Down
1 change: 1 addition & 0 deletions cmake/presets/token_api_only.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)
40 changes: 0 additions & 40 deletions operators/vision/decode_image.cc

This file was deleted.

50 changes: 42 additions & 8 deletions operators/vision/decode_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,53 @@

#pragma once

#include "ocos.h"
#include "string_utils.h"
#include <map>

#include <cstdint>
#include "ext_status.h"
#include "op_def_struct.h"

#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
#if WIN32
#include "image_decoder_win32.hpp"
#elif __APPLE__
#include "image_decoder_darwin.hpp"
#else
#include "image_decoder.hpp"
#endif
#else
#include "image_decoder.hpp"
#endif

namespace ort_extensions {
struct DecodeImage: public internal::DecodeImage {
OrtStatusPtr OnModelAttach(const OrtApi& api, const OrtKernelInfo& info) {
is_bgr_ = true;
return internal::DecodeImage::Init(std::map<std::string, std::string>());
}

OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
auto status = internal::DecodeImage::Compute(input, output);
if (!status.IsOk()) {
return status;
}

if (is_bgr_) {
// need to convert rgb to bgr for backward compatibility
const auto& dimensions = output.Shape();
uint8_t* rgb_data = const_cast<uint8_t*>(output.Data());
// do an inplace swap of the channels
for (int y = 0; y < dimensions[0]; ++y) {
for (int x = 0; x < dimensions[1]; ++x) {
std::swap(rgb_data[(y * dimensions[1] + x) * 3 + 0], rgb_data[(y * dimensions[1] + x) * 3 + 2]);
}
}
}

void decode_image(const ortc::Tensor<uint8_t>& input,
ortc::Tensor<uint8_t>& output);
return status;
}

struct KernelDecodeImage : BaseKernel {
KernelDecodeImage(const OrtApi& api, const OrtKernelInfo& info) : BaseKernel(api, info) {}
void Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const;
private:
bool is_bgr_{}; // flag to indicate if the output is in BGR format
};

} // namespace ort_extensions
118 changes: 101 additions & 17 deletions operators/vision/encode_image.cc
Original file line number Diff line number Diff line change
@@ -1,40 +1,124 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "png.h"
#include "jpeglib.h"
#include "op_def_struct.h"
#include "ext_status.h"

#include "encode_image.hpp"

#include <opencv2/imgcodecs.hpp>

namespace ort_extensions {

void KernelEncodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
// Setup inputs
const auto dimensions_bgr = input.Shape();

const auto& dimensions_bgr = input.Shape();
if (dimensions_bgr.size() != 3 || dimensions_bgr[2] != 3) {
// expect {H, W, C} as that's the inverse of what decode_image produces.
// we have no way to check if it's BGR or RGB though
ORTX_CXX_API_THROW("[EncodeImage] requires rank 3 BGR input in channels last format.", ORT_INVALID_ARGUMENT);
}

// Get data & the length
std::vector<int32_t> height_x_width{static_cast<int32_t>(dimensions_bgr[0]), // H
static_cast<int32_t>(dimensions_bgr[1])}; // W
const int color_space = 3;
const uint8_t* bgr_data = input.Data();
unsigned char* outbuffer = nullptr;
std::vector<uint8_t> png_buffer;
size_t outsize = 0;

auto rgb_data = std::make_unique<uint8_t[]>(height_x_width[0] * height_x_width[1] * color_space);
for (int y = 0; y < height_x_width[0]; ++y) {
for (int x = 0; x < height_x_width[1]; ++x) {
rgb_data[(y * height_x_width[1] + x) * color_space + 0] = bgr_data[(y * height_x_width[1] + x) * color_space + 2];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we can save height_x_width[1] in a variable here since it is called many times, to save compute/time to access the vector each time.

rgb_data[(y * height_x_width[1] + x) * color_space + 1] = bgr_data[(y * height_x_width[1] + x) * color_space + 1];
rgb_data[(y * height_x_width[1] + x) * color_space + 2] = bgr_data[(y * height_x_width[1] + x) * color_space + 0];
}
}

if (extension_ == ".jpg") {
struct jpeg_compress_struct cinfo;
struct jpeg_error_mgr jerr;

cinfo.err = jpeg_std_error(&jerr);
jpeg_create_compress(&cinfo);
jpeg_mem_dest(&cinfo, &outbuffer, &outsize);

cinfo.image_width = height_x_width[1];
cinfo.image_height = height_x_width[0];
cinfo.input_components = color_space;
cinfo.in_color_space = JCS_RGB;

// compression parameters is compatible with opencv
jpeg_set_defaults(&cinfo);
jpeg_set_quality(&cinfo, 95, TRUE);
cinfo.optimize_coding = FALSE;
cinfo.restart_interval = 0;
cinfo.q_scale_factor[0] = jpeg_quality_scaling(-1);
cinfo.q_scale_factor[1] = jpeg_quality_scaling(-1);

const int sampling_factor = 0x221111; // 4:2:0 IMWRITE_JPEG_SAMPLING_FACTOR_420
cinfo.comp_info[0].v_samp_factor = (sampling_factor >> 16 ) & 0xF;
cinfo.comp_info[0].h_samp_factor = (sampling_factor >> 20 ) & 0xF;
cinfo.comp_info[1].v_samp_factor = 1;
cinfo.comp_info[1].h_samp_factor = 1;
// jpeg_default_qtables( &cinfo, TRUE );

jpeg_start_compress(&cinfo, TRUE);

// data is const uint8_t but opencv2 wants void*.
const void* bgr_data = input.Data();
const cv::Mat bgr_image(height_x_width, CV_8UC3, const_cast<void*>(bgr_data));
JSAMPROW row_pointer[1];
while (cinfo.next_scanline < cinfo.image_height) {
row_pointer[0] = (JSAMPROW)&rgb_data[cinfo.next_scanline * cinfo.image_width * color_space];
jpeg_write_scanlines(&cinfo, row_pointer, 1);
}

// don't know output size ahead of time so need to encode and then copy to output
std::vector<uint8_t> encoded_image;
if (!cv::imencode(extension_, bgr_image, encoded_image)) {
ORTX_CXX_API_THROW("[EncodeImage] Image encoding failed.", ORT_INVALID_ARGUMENT);
jpeg_finish_compress(&cinfo);
jpeg_destroy_compress(&cinfo);
} else if (extension_ == ".png") {
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png_ptr) {
ORTX_CXX_API_THROW("[EncodeImage] PNG create write struct failed.", ORT_INVALID_ARGUMENT);
}

png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr) {
png_destroy_write_struct(&png_ptr, nullptr);
ORTX_CXX_API_THROW("[EncodeImage] PNG create info struct failed.", ORT_INVALID_ARGUMENT);
}

if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_write_struct(&png_ptr, &info_ptr);
ORTX_CXX_API_THROW("[EncodeImage] PNG encoding failed.", ORT_INVALID_ARGUMENT);
}

png_set_write_fn(png_ptr, &png_buffer, [](png_structp png_ptr, png_bytep data, png_size_t length) {
auto p = reinterpret_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
p->insert(p->end(), data, data + length);
}, nullptr);

png_set_IHDR(png_ptr, info_ptr, height_x_width[1], height_x_width[0], 8, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);

png_write_info(png_ptr, info_ptr);

for (int y = 0; y < height_x_width[0]; ++y) {
png_write_row(png_ptr, (png_bytep)&rgb_data[y * height_x_width[1] * color_space]);
}

png_write_end(png_ptr, info_ptr);
png_destroy_write_struct(&png_ptr, &info_ptr);

outbuffer = png_buffer.data();
outsize = png_buffer.size();
} else {
ORTX_CXX_API_THROW("[EncodeImage] Unsupported image format.", ORT_INVALID_ARGUMENT);
}

// Setup output & copy to destination
std::vector<int64_t> output_dimensions{static_cast<int64_t>(encoded_image.size())};
std::vector<int64_t> output_dimensions{static_cast<int64_t>(outsize)};
uint8_t* data = output.Allocate(output_dimensions);
memcpy(data, encoded_image.data(), encoded_image.size());
memcpy(data, outbuffer, outsize);

if (outbuffer != png_buffer.data() && outbuffer != nullptr) {
free(outbuffer);
}
}

} // namespace ort_extensions
Loading
Loading