microsoft · wenbingl · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024
@@ -197,7 +197,7 @@ stages:
     # compiled as only one operator selected.
     - bash: |
         set -e -x -u
-        ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
+        ./build.sh -DOCOS_ENABLE_C_API=ON
         cd out/Linux/RelWithDebInfo
         ctest -C RelWithDebInfo --output-on-failure
       displayName: Build ort-extensions with API enabled and run tests
@@ -281,7 +281,7 @@ stages:
     # compiled as only one operator selected.
     - bash: |
         set -e -x -u
-        ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
+        ./build.sh -DOCOS_ENABLE_C_API=ON
         cd out/Darwin/RelWithDebInfo
         ctest -C RelWithDebInfo --output-on-failure
       displayName: Build ort-extensions with API enabled and run tests
@@ -431,7 +431,7 @@ stages:
 
     steps:
     - script: |
-        call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
+        call .\build.bat -DOCOS_ENABLE_C_API=ON
         cd out\Windows
         ctest -C RelWithDebInfo --output-on-failure
       displayName: Build ort-extensions with API enabled and run tests

@@ -208,8 +208,7 @@ def build_cmake(self, extension):
             # Disabling openCV can drastically reduce the build time.
             cmake_args += [
                 '-DOCOS_ENABLE_OPENCV_CODECS=OFF',
-                '-DOCOS_ENABLE_CV2=OFF',
-                '-DOCOS_ENABLE_VISION=OFF']
+                '-DOCOS_ENABLE_CV2=OFF']
 
         if self.pp_api:
             if not self.no_opencv:

@@ -72,8 +72,8 @@ option(OCOS_ENABLE_BLINGFIRE "Enable operators depending on the Blingfire librar
 option(OCOS_ENABLE_MATH "Enable math tensor operators building" ON)
 option(OCOS_ENABLE_DLIB "Enable operators like Inverse depending on DLIB" ON)
 option(OCOS_ENABLE_VENDOR_IMAGE_CODECS "Enable and use vendor image codecs if supported over libpng & libjpeg" OFF)
-option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." ON)
-option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" ON)
+option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." OFF)
+option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" OFF)
 option(OCOS_ENABLE_VISION "Enable the operators in `operators/vision`" ON)
 option(OCOS_ENABLE_AUDIO "Enable the operators for audio processing" ON)
 option(OCOS_ENABLE_AZURE "Enable the operators for azure execution provider" OFF)
@@ -383,7 +383,7 @@ if (OCOS_USE_CUDA)
 endif()
 
 # enable the opencv dependency if we have ops that require it
-if(OCOS_ENABLE_CV2 OR OCOS_ENABLE_VISION)
+if(OCOS_ENABLE_CV2)
   set(_ENABLE_OPENCV ON)
   message(STATUS "Fetch opencv")
   include(opencv)
@@ -402,10 +402,6 @@ if(OCOS_ENABLE_CV2)
 endif()
 
 if(OCOS_ENABLE_VISION)
-  if(NOT OCOS_ENABLE_OPENCV_CODECS)
-    message(FATAL_ERROR "OCOS_ENABLE_VISION requires OCOS_ENABLE_OPENCV_CODECS to be ON")
-  endif()
-
   file(GLOB TARGET_SRC_VISION "operators/vision/*.cc" "operators/vision/*.h*")
   list(APPEND TARGET_SRC ${TARGET_SRC_VISION})
 endif()
@@ -653,6 +649,25 @@ endif()
 
 if(OCOS_ENABLE_VISION)
   list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_VISION)
+  set(_DEFAULT_CODEC_ENABLE ON)
+  if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
+    add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
+    if(WIN32)
+      # Use WIC on Windows. Nothing to be done
+      set(_DEFAULT_CODEC_ENABLE OFF)
+    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+      # Use ImageIO on Apple platforms
+      set(_DEFAULT_CODEC_ENABLE OFF)
+      target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
+    endif()
+  endif()
+
+  set(_DEFAULT_CODEC_ENABLE ON) # libpng and libjpeg can be optional after EncodeImage with native support too.
+  if(_DEFAULT_CODEC_ENABLE)
+    include(ext_imgcodecs)  
+    target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
+    target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
+  endif()
 endif()
 
 if(OCOS_ENABLE_AZURE)
@@ -740,24 +755,6 @@ if(OCOS_ENABLE_C_API)
   if(OCOS_ENABLE_DLIB)
     file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
     list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
-    if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
-      add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
-      if(WIN32)
-        # Use WIC on Windows. Nothing to be done
-      elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-        # Use ImageIO on Apple platforms
-        target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
-      else()
-        # Fallback to libpng & libjpeg on all other platforms
-        include(ext_imgcodecs)
-        target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
-        target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
-      endif()
-    else()
-      include(ext_imgcodecs)
-      target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
-      target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
-    endif()
   endif()
 endif()
 

@@ -4,9 +4,7 @@
 set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
-set(OCOS_ENABLE_OPENCV_CODECS OFF CACHE INTERNAL "" FORCE)
-set(OCOS_ENABLE_CV2 OFF CACHE INTERNAL "" FORCE)
-set(OCOS_ENABLE_VISION OFF CACHE INTERNAL "" FORCE)
+set(OCOS_ENABLE_VISION ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_VENDOR_IMAGE_CODECS ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)

@@ -3,3 +3,4 @@
 
 set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
+set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)
@@ -3,19 +3,53 @@
 
 #pragma once
 
-#include "ocos.h"
-#include "string_utils.h"
+#include <map>
 
-#include <cstdint>
+#include "ext_status.h"
+#include "op_def_struct.h"
+
+#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
+  #if WIN32
+    #include "image_decoder_win32.hpp"
+  #elif __APPLE__
+    #include "image_decoder_darwin.hpp"
+  #else
+    #include "image_decoder.hpp"
+  #endif
+#else
+  #include "image_decoder.hpp"
+#endif
 
 namespace ort_extensions {
+struct DecodeImage: public internal::DecodeImage {
+  OrtStatusPtr OnModelAttach(const OrtApi& api, const OrtKernelInfo& info) {
+    is_bgr_ = true;
+    return internal::DecodeImage::Init(std::map<std::string, std::string>());
+  }
+
+  OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
+    auto status = internal::DecodeImage::Compute(input, output);
+    if (!status.IsOk()) {
+      return status;
+    }
+
+    if (is_bgr_) {
+      // need to convert rgb to bgr for backward compatibility
+      const auto& dimensions = output.Shape();
+      uint8_t* rgb_data = const_cast<uint8_t*>(output.Data());
+      // do an inplace swap of the channels
+      for (int y = 0; y < dimensions[0]; ++y) {
+        for (int x = 0; x < dimensions[1]; ++x) {
+          std::swap(rgb_data[(y * dimensions[1] + x) * 3 + 0], rgb_data[(y * dimensions[1] + x) * 3 + 2]);
+        }
+      }
+    }
 
-void decode_image(const ortc::Tensor<uint8_t>& input,
-                  ortc::Tensor<uint8_t>& output);
+    return status;
+  }
 
-struct KernelDecodeImage : BaseKernel {
-  KernelDecodeImage(const OrtApi& api, const OrtKernelInfo& info) : BaseKernel(api, info) {}
-  void Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const;
+  private:
+    bool is_bgr_{};  // flag to indicate if the output is in BGR format
 };
 
 }  // namespace ort_extensions
@@ -1,40 +1,124 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "png.h"
+#include "jpeglib.h"
+#include "op_def_struct.h"
+#include "ext_status.h"
+
 #include "encode_image.hpp"
 
-#include <opencv2/imgcodecs.hpp>
 
 namespace ort_extensions {
 
 void KernelEncodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
-  // Setup inputs
-  const auto dimensions_bgr = input.Shape();
-
+  const auto& dimensions_bgr = input.Shape();
   if (dimensions_bgr.size() != 3 || dimensions_bgr[2] != 3) {
-    // expect {H, W, C} as that's the inverse of what decode_image produces.
-    // we have no way to check if it's BGR or RGB though
     ORTX_CXX_API_THROW("[EncodeImage] requires rank 3 BGR input in channels last format.", ORT_INVALID_ARGUMENT);
   }
 
-  // Get data & the length
   std::vector<int32_t> height_x_width{static_cast<int32_t>(dimensions_bgr[0]),   // H
                                       static_cast<int32_t>(dimensions_bgr[1])};  // W
+  const int color_space = 3;
+  const uint8_t* bgr_data = input.Data();
+  unsigned char* outbuffer = nullptr;
+  std::vector<uint8_t> png_buffer;
+  size_t outsize = 0;
+
+  auto rgb_data = std::make_unique<uint8_t[]>(height_x_width[0] * height_x_width[1] * color_space);
+  for (int y = 0; y < height_x_width[0]; ++y) {
+    for (int x = 0; x < height_x_width[1]; ++x) {
+      rgb_data[(y * height_x_width[1] + x) * color_space + 0] = bgr_data[(y * height_x_width[1] + x) * color_space + 2];
+      rgb_data[(y * height_x_width[1] + x) * color_space + 1] = bgr_data[(y * height_x_width[1] + x) * color_space + 1];
+      rgb_data[(y * height_x_width[1] + x) * color_space + 2] = bgr_data[(y * height_x_width[1] + x) * color_space + 0];
+    }
+  }
+
+  if (extension_ == ".jpg") {
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+    jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
+
+    cinfo.image_width = height_x_width[1];
+    cinfo.image_height = height_x_width[0];
+    cinfo.input_components = color_space;
+    cinfo.in_color_space = JCS_RGB;
+
+    // compression parameters is compatible with opencv
+    jpeg_set_defaults(&cinfo);
+    jpeg_set_quality(&cinfo, 95, TRUE);
+    cinfo.optimize_coding = FALSE;
+    cinfo.restart_interval = 0;
+    cinfo.q_scale_factor[0] = jpeg_quality_scaling(-1);
+    cinfo.q_scale_factor[1] = jpeg_quality_scaling(-1);
+
+    const int sampling_factor = 0x221111; // 4:2:0  IMWRITE_JPEG_SAMPLING_FACTOR_420
+    cinfo.comp_info[0].v_samp_factor = (sampling_factor >> 16 ) & 0xF;
+    cinfo.comp_info[0].h_samp_factor = (sampling_factor >> 20 ) & 0xF;
+    cinfo.comp_info[1].v_samp_factor = 1;
+    cinfo.comp_info[1].h_samp_factor = 1;
+    // jpeg_default_qtables( &cinfo, TRUE );
+
+    jpeg_start_compress(&cinfo, TRUE);
 
-  // data is const uint8_t but opencv2 wants void*.
-  const void* bgr_data = input.Data();
-  const cv::Mat bgr_image(height_x_width, CV_8UC3, const_cast<void*>(bgr_data));
+    JSAMPROW row_pointer[1];
+    while (cinfo.next_scanline < cinfo.image_height) {
+      row_pointer[0] = (JSAMPROW)&rgb_data[cinfo.next_scanline * cinfo.image_width * color_space];
+      jpeg_write_scanlines(&cinfo, row_pointer, 1);
+    }
 
-  // don't know output size ahead of time so need to encode and then copy to output
-  std::vector<uint8_t> encoded_image;
-  if (!cv::imencode(extension_, bgr_image, encoded_image)) {
-    ORTX_CXX_API_THROW("[EncodeImage] Image encoding failed.", ORT_INVALID_ARGUMENT);
+    jpeg_finish_compress(&cinfo);
+    jpeg_destroy_compress(&cinfo);
+  } else if (extension_ == ".png") {
+    png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
+    if (!png_ptr) {
+      ORTX_CXX_API_THROW("[EncodeImage] PNG create write struct failed.", ORT_INVALID_ARGUMENT);
+    }
+
+    png_infop info_ptr = png_create_info_struct(png_ptr);
+    if (!info_ptr) {
+      png_destroy_write_struct(&png_ptr, nullptr);
+      ORTX_CXX_API_THROW("[EncodeImage] PNG create info struct failed.", ORT_INVALID_ARGUMENT);
+    }
+
+    if (setjmp(png_jmpbuf(png_ptr))) {
+      png_destroy_write_struct(&png_ptr, &info_ptr);
+      ORTX_CXX_API_THROW("[EncodeImage] PNG encoding failed.", ORT_INVALID_ARGUMENT);
+    }
+
+    png_set_write_fn(png_ptr, &png_buffer, [](png_structp png_ptr, png_bytep data, png_size_t length) {
+      auto p = reinterpret_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
+      p->insert(p->end(), data, data + length);
+    }, nullptr);
+
+    png_set_IHDR(png_ptr, info_ptr, height_x_width[1], height_x_width[0], 8, PNG_COLOR_TYPE_RGB,
+                 PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+    png_write_info(png_ptr, info_ptr);
+
+    for (int y = 0; y < height_x_width[0]; ++y) {
+      png_write_row(png_ptr, (png_bytep)&rgb_data[y * height_x_width[1] * color_space]);
+    }
+
+    png_write_end(png_ptr, info_ptr);
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+
+    outbuffer = png_buffer.data();
+    outsize = png_buffer.size();
+  } else {
+    ORTX_CXX_API_THROW("[EncodeImage] Unsupported image format.", ORT_INVALID_ARGUMENT);
   }
 
-  // Setup output & copy to destination
-  std::vector<int64_t> output_dimensions{static_cast<int64_t>(encoded_image.size())};
+  std::vector<int64_t> output_dimensions{static_cast<int64_t>(outsize)};
   uint8_t* data = output.Allocate(output_dimensions);
-  memcpy(data, encoded_image.data(), encoded_image.size());
+  memcpy(data, outbuffer, outsize);
+
+  if (outbuffer != png_buffer.data() && outbuffer != nullptr) {
+    free(outbuffer);
+  }
 }
 
 }  // namespace ort_extensions