Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

T1 & DWT multithreading decoding optimizations #786

Merged
merged 23 commits into from
Sep 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
426bf8d
Move some MQC functions into a header for speed
c0nk Dec 27, 2015
c539808
opj_t1_updateflags(): tiny optimization
rouault May 21, 2016
d8fef96
Improve code generation in opj_t1_dec_clnpass()
rouault May 21, 2016
23a01df
Specialize decoding passes for 64x64 code blocks
rouault May 21, 2016
ba1edf6
Reduce number of occurrences of orient function argument
rouault May 21, 2016
31882ad
Const'ify lut arrays so they are in the read-only data section
rouault May 21, 2016
1da397e
Tier 1 decoding: add a colflags array
rouault May 22, 2016
93f7f90
opj_t1_decode_cblks(): tiny perf increase when loop unrolling
rouault May 23, 2016
956c31d
opj_t1_dec_clnpass(): remove useless test in the runlen decoding path…
rouault May 23, 2016
8371491
Better inlining of opj_t1_updateflagscolflags() w.r.t. flags_stride
rouault May 23, 2016
107eb31
Improve perf of opj_t1_dec_sigpass_mqc_vsc() and opj_t1_dec_refpass_m…
rouault May 23, 2016
7092f7e
Fix MSVC210 build issue (use of C99 declaration after statement) intr…
rouault May 23, 2016
54179fe
Add threading and thread pool API
rouault May 25, 2016
d4b7f03
Add opj_codec_set_threads() in public API and propagate resulting thr…
rouault May 25, 2016
5fbb8b2
Use thread-pool for T1 decoding
rouault May 25, 2016
57b216b
Use thread pool for DWT decoding
rouault May 25, 2016
e3eb0a2
.travis.yml: add a conf with OPJ_NUM_THREADS=2
rouault May 25, 2016
d67cd22
opj_decompress: add a -threads <num_threads> option
rouault May 25, 2016
69497d3
opj_decompress: use clock_gettime() instead of getrusage() so as to g…
rouault May 25, 2016
7d3c7a3
Be robust to failed allocations of job structures
rouault May 26, 2016
4f9abb9
[Win32] Use _beginthreadex instead of CreateThread()
rouault Aug 11, 2016
ab22c5b
opj_thread_pool: fix potential deadlock at thread pool destruction
rouault Sep 8, 2016
48c16b2
Merge branch 'master' of https://github.com/uclouvain/openjpeg into t…
rouault Sep 8, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ matrix:
- os: linux
compiler: gcc
env: OPJ_CI_ARCH=x86_64 OPJ_CI_BUILD_CONFIGURATION=Release OPJ_CI_INCLUDE_IF_DEPLOY=1
- os: linux
compiler: gcc
env: OPJ_CI_ARCH=x86_64 OPJ_CI_BUILD_CONFIGURATION=Release OPJ_NUM_THREADS=2
- os: linux
compiler: gcc
env: OPJ_CI_ARCH=i386 OPJ_CI_BUILD_CONFIGURATION=Release
Expand Down
3 changes: 3 additions & 0 deletions src/bin/jp2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ foreach(exe opj_decompress opj_compress opj_dump)
# On unix you need to link to the math library:
if(UNIX)
target_link_libraries(${exe} m)
IF("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
target_link_libraries(${exe} rt)
endif()
endif()
# Install exe
install(TARGETS ${exe}
Expand Down
60 changes: 47 additions & 13 deletions src/bin/jp2/opj_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

#ifdef _WIN32
#include "windirent.h"
Expand Down Expand Up @@ -150,6 +151,8 @@ typedef struct opj_decompress_params
int upsample;
/* split output components to different files */
int split_pnm;
/** number of threads */
int num_threads;
}opj_decompress_parameters;

/* -------------------------------------------------------------------------- */
Expand Down Expand Up @@ -224,8 +227,11 @@ static void decode_help_display(void) {
" -upsample\n"
" Downsampled components will be upsampled to image size\n"
" -split-pnm\n"
" Split output components to different files when writing to PNM\n"
"\n");
" Split output components to different files when writing to PNM\n");
if( opj_has_thread_support() ) {
fprintf(stdout," -threads <num_threads>\n"
" Number of threads to use for decoding.\n");
}
/* UniPG>> */
#ifdef USE_JPWL
fprintf(stdout," -W <options>\n"
Expand Down Expand Up @@ -520,7 +526,8 @@ int parse_cmdline_decoder(int argc, char **argv, opj_decompress_parameters *para
{"OutFor", REQ_ARG, NULL,'O'},
{"force-rgb", NO_ARG, NULL, 1},
{"upsample", NO_ARG, NULL, 1},
{"split-pnm", NO_ARG, NULL, 1}
{"split-pnm", NO_ARG, NULL, 1},
{"threads", REQ_ARG, NULL, 'T'}
};

const char optlist[] = "i:o:r:l:x:d:t:p:"
Expand Down Expand Up @@ -808,6 +815,22 @@ int parse_cmdline_decoder(int argc, char **argv, opj_decompress_parameters *para
break;
#endif /* USE_JPWL */
/* <<UniPG */

/* ----------------------------------------------------- */
case 'T': /* Number of threads */
{
if( strcmp(opj_optarg, "ALL_CPUS") == 0 )
{
parameters->num_threads = opj_get_num_cpus();
if( parameters->num_threads == 1 )
parameters->num_threads = 0;
}
else
{
sscanf(opj_optarg, "%d", &parameters->num_threads);
}
}
break;

/* ----------------------------------------------------- */

Expand Down Expand Up @@ -885,17 +908,22 @@ OPJ_FLOAT64 opj_clock(void) {
/* t is the high resolution performance counter (see MSDN) */
QueryPerformanceCounter ( & t ) ;
return freq.QuadPart ? (t.QuadPart / (OPJ_FLOAT64)freq.QuadPart) : 0;
#elif defined(__linux)
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return( ts.tv_sec + ts.tv_nsec * 1e-9 );
#else
/* Unix or Linux: use resource usage */
struct rusage t;
OPJ_FLOAT64 procTime;
/* (1) Get the rusage data structure at this moment (man getrusage) */
getrusage(0,&t);
/* (2) What is the elapsed time ? - CPU time = User time + System time */
/* Unix : use resource usage */
/* FIXME: this counts the total CPU time, instead of the user perceived time */
struct rusage t;
OPJ_FLOAT64 procTime;
/* (1) Get the rusage data structure at this moment (man getrusage) */
getrusage(0,&t);
/* (2) What is the elapsed time ? - CPU time = User time + System time */
/* (2a) Get the seconds */
procTime = (OPJ_FLOAT64)(t.ru_utime.tv_sec + t.ru_stime.tv_sec);
/* (2b) More precisely! Get the microseconds part ! */
return ( procTime + (OPJ_FLOAT64)(t.ru_utime.tv_usec + t.ru_stime.tv_usec) * 1e-6 ) ;
procTime = (OPJ_FLOAT64)(t.ru_utime.tv_sec + t.ru_stime.tv_sec);
/* (2b) More precisely! Get the microseconds part ! */
return ( procTime + (OPJ_FLOAT64)(t.ru_utime.tv_usec + t.ru_stime.tv_usec) * 1e-6 ) ;
#endif
}

Expand Down Expand Up @@ -1306,7 +1334,13 @@ int main(int argc, char **argv)
opj_destroy_codec(l_codec);
failed = 1; goto fin;
}


if( parameters.num_threads >= 1 && !opj_codec_set_threads(l_codec, parameters.num_threads) ) {
fprintf(stderr, "ERROR -> opj_decompress: failed to set number of threads\n");
opj_stream_destroy(l_stream);
opj_destroy_codec(l_codec);
failed = 1; goto fin;
}

/* Read the main header of the codestream and if necessary the JP2 boxes*/
if(! opj_read_header(l_stream, l_codec, &image)){
Expand Down
41 changes: 41 additions & 0 deletions src/lib/openjp2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ include_directories(
)
# Defines the source code for the library
set(OPENJPEG_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/thread.c
${CMAKE_CURRENT_SOURCE_DIR}/thread.h
${CMAKE_CURRENT_SOURCE_DIR}/bio.c
${CMAKE_CURRENT_SOURCE_DIR}/bio.h
${CMAKE_CURRENT_SOURCE_DIR}/cio.c
Expand All @@ -29,6 +31,7 @@ set(OPENJPEG_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/mct.h
${CMAKE_CURRENT_SOURCE_DIR}/mqc.c
${CMAKE_CURRENT_SOURCE_DIR}/mqc.h
${CMAKE_CURRENT_SOURCE_DIR}/mqc_inl.h
${CMAKE_CURRENT_SOURCE_DIR}/openjpeg.c
${CMAKE_CURRENT_SOURCE_DIR}/openjpeg.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_clock.c
Expand Down Expand Up @@ -73,6 +76,11 @@ if(OPJ_DISABLE_TPSOT_FIX)
add_definitions(-DOPJ_DISABLE_TPSOT_FIX)
endif()

# Special case for old i586-mingw32msvc-gcc cross compiler
if(NOT WIN32 AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER MATCHES ".*mingw32msvc.*" )
set(WIN32 YES)
endif()

# Build the library
if(WIN32)
if(BUILD_SHARED_LIBS)
Expand Down Expand Up @@ -142,3 +150,36 @@ if(OPJ_USE_DSYMUTIL)
DEPENDS ${OPENJPEG_LIBRARY_NAME})
endif()
endif()

#################################################################################
# threading configuration
#################################################################################
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)

option(USE_THREAD "Build with thread/mutex support " ON)
if(NOT USE_THREAD)
add_definitions( -DMUTEX_stub)
endif(NOT USE_THREAD)

find_package(Threads QUIET)

if(USE_THREAD AND WIN32 AND NOT Threads_FOUND )
add_definitions( -DMUTEX_win32)
set(Threads_FOUND YES)
endif()

if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT )
add_definitions( -DMUTEX_win32)
endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT )

if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT )
add_definitions( -DMUTEX_pthread)
endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT )

if(USE_THREAD AND NOT Threads_FOUND)
message(FATAL_ERROR "No thread library found and thread/mutex support is required by USE_THREAD option")
endif(USE_THREAD AND NOT Threads_FOUND)

if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT})
endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
Loading