-
Notifications
You must be signed in to change notification settings - Fork 456
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
T1 & DWT multithreading decoding optimizations #786
Changes from 1 commit
426bf8d
c539808
d8fef96
23a01df
ba1edf6
31882ad
1da397e
93f7f90
956c31d
8371491
107eb31
7092f7e
54179fe
d4b7f03
5fbb8b2
57b216b
e3eb0a2
d67cd22
69497d3
7d3c7a3
4f9abb9
ab22c5b
48c16b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,7 +124,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_st | |
/** | ||
Inverse wavelet transform in 2-D. | ||
*/ | ||
static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); | ||
static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); | ||
|
||
static OPJ_BOOL opj_dwt_encode_procedure( opj_tcd_tilecomp_t * tilec, | ||
void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ); | ||
|
@@ -473,8 +473,8 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) | |
/* <summary> */ | ||
/* Inverse 5-3 wavelet transform in 2-D. */ | ||
/* </summary> */ | ||
OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { | ||
return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1); | ||
OPJ_BOOL opj_dwt_decode(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { | ||
return opj_dwt_decode_tile(tp, tilec, numres, &opj_dwt_decode_1); | ||
} | ||
|
||
|
||
|
@@ -556,10 +556,72 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_U | |
return mr ; | ||
} | ||
|
||
typedef struct | ||
{ | ||
opj_dwt_t h; | ||
DWT1DFN dwt_1D; | ||
OPJ_UINT32 rw; | ||
OPJ_UINT32 w; | ||
OPJ_INT32 * restrict tiledp; | ||
int min_j; | ||
int max_j; | ||
} opj_dwd_decode_h_job_t; | ||
|
||
static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) | ||
{ | ||
int j; | ||
opj_dwd_decode_h_job_t* job; | ||
(void)tls; | ||
|
||
job = (opj_dwd_decode_h_job_t*)user_data; | ||
for( j = job->min_j; j < job->max_j; j++ ) | ||
{ | ||
opj_dwt_interleave_h(&job->h, &job->tiledp[j*job->w]); | ||
(job->dwt_1D)(&job->h); | ||
memcpy(&job->tiledp[j*job->w], job->h.mem, job->rw * sizeof(OPJ_INT32)); | ||
} | ||
|
||
opj_aligned_free(job->h.mem); | ||
opj_free(job); | ||
} | ||
|
||
typedef struct | ||
{ | ||
opj_dwt_t v; | ||
DWT1DFN dwt_1D; | ||
OPJ_UINT32 rh; | ||
OPJ_UINT32 w; | ||
OPJ_INT32 * restrict tiledp; | ||
int min_j; | ||
int max_j; | ||
} opj_dwd_decode_v_job_t; | ||
|
||
static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) | ||
{ | ||
int j; | ||
opj_dwd_decode_v_job_t* job; | ||
(void)tls; | ||
|
||
job = (opj_dwd_decode_v_job_t*)user_data; | ||
for( j = job->min_j; j < job->max_j; j++ ) | ||
{ | ||
OPJ_UINT32 k; | ||
opj_dwt_interleave_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w); | ||
(job->dwt_1D)(&job->v); | ||
for(k = 0; k < job->rh; ++k) { | ||
job->tiledp[k * job->w + j] = job->v.mem[k]; | ||
} | ||
} | ||
|
||
opj_aligned_free(job->v.mem); | ||
opj_free(job); | ||
} | ||
|
||
|
||
/* <summary> */ | ||
/* Inverse wavelet transform in 2-D. */ | ||
/* </summary> */ | ||
static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { | ||
static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { | ||
opj_dwt_t h; | ||
opj_dwt_t v; | ||
|
||
|
@@ -569,11 +631,15 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres | |
OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ | ||
|
||
OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); | ||
size_t h_mem_size; | ||
int num_threads; | ||
|
||
if (numres == 1U) { | ||
return OPJ_TRUE; | ||
} | ||
h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); | ||
num_threads = opj_thread_pool_get_thread_count(tp); | ||
h_mem_size = opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32); | ||
h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); | ||
if (! h.mem){ | ||
/* FIXME event manager error callback */ | ||
return OPJ_FALSE; | ||
|
@@ -595,23 +661,93 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres | |
h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); | ||
h.cas = tr->x0 % 2; | ||
|
||
for(j = 0; j < rh; ++j) { | ||
opj_dwt_interleave_h(&h, &tiledp[j*w]); | ||
(dwt_1D)(&h); | ||
memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); | ||
} | ||
if( num_threads <= 1 || rh == 1 ) | ||
{ | ||
for(j = 0; j < rh; ++j) { | ||
opj_dwt_interleave_h(&h, &tiledp[j*w]); | ||
(dwt_1D)(&h); | ||
memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); | ||
} | ||
} | ||
else | ||
{ | ||
int num_jobs = num_threads; | ||
if( rh < num_jobs ) | ||
num_jobs = rh; | ||
for( j = 0; j < num_jobs; j++ ) | ||
{ | ||
opj_dwd_decode_h_job_t* job; | ||
|
||
job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); | ||
job->h = h; | ||
job->dwt_1D = dwt_1D; | ||
job->rw = rw; | ||
job->w = w; | ||
job->tiledp = tiledp; | ||
job->min_j = j * (rh / num_jobs); | ||
job->max_j = (j+1) * (rh / num_jobs); | ||
if( job->max_j > rh || j == num_jobs - 1 ) | ||
job->max_j = rh; | ||
job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); | ||
if (!job->h.mem) | ||
{ | ||
/* FIXME event manager error callback */ | ||
opj_thread_pool_wait_completion(tp, 0); | ||
opj_free(job); | ||
opj_aligned_free(h.mem); | ||
return OPJ_FALSE; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the jobs are submitted outside of this loop (i.e. in another one following), then it would be possible to fallback to single-thread in case of allocation error (& changing single-thread condition below the loop). It's very likely that if hitting an out of memory condition here, one will be raised later so it's quite arguable wether to do this or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ok, I now just got what you meant. Well, in modern OS, if malloc failures for such small structures happen you are in big trouble (swap trashing, etc...), so a clean error exit is probably good enough than a smarter fallback strategy There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I said, it was quite arguable. A clean error is probably good enough. |
||
} | ||
opj_thread_pool_submit_job( tp, opj_dwt_decode_h_func, job ); | ||
} | ||
opj_thread_pool_wait_completion(tp, 0); | ||
} | ||
|
||
v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); | ||
v.cas = tr->y0 % 2; | ||
|
||
for(j = 0; j < rw; ++j){ | ||
OPJ_UINT32 k; | ||
opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); | ||
(dwt_1D)(&v); | ||
for(k = 0; k < rh; ++k) { | ||
tiledp[k * w + j] = v.mem[k]; | ||
} | ||
} | ||
if( num_threads <= 1 || rw == 1 ) | ||
{ | ||
for(j = 0; j < rw; ++j){ | ||
OPJ_UINT32 k; | ||
opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); | ||
(dwt_1D)(&v); | ||
for(k = 0; k < rh; ++k) { | ||
tiledp[k * w + j] = v.mem[k]; | ||
} | ||
} | ||
} | ||
else | ||
{ | ||
int num_jobs = num_threads; | ||
if( rw < num_jobs ) | ||
num_jobs = rw; | ||
for( j = 0; j < num_jobs; j++ ) | ||
{ | ||
opj_dwd_decode_v_job_t* job; | ||
|
||
job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing allocation check. |
||
job->v = v; | ||
job->dwt_1D = dwt_1D; | ||
job->rh = rh; | ||
job->w = w; | ||
job->tiledp = tiledp; | ||
job->min_j = j * (rw / num_jobs); | ||
job->max_j = (j+1) * (rw / num_jobs); | ||
if( job->max_j > rw || j == num_jobs - 1 ) | ||
job->max_j = rw; | ||
job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); | ||
if (!job->v.mem) | ||
{ | ||
/* FIXME event manager error callback */ | ||
opj_thread_pool_wait_completion(tp, 0); | ||
opj_free(job); | ||
opj_aligned_free(v.mem); | ||
return OPJ_FALSE; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
opj_thread_pool_submit_job( tp, opj_dwt_decode_v_func, job ); | ||
} | ||
opj_thread_pool_wait_completion(tp, 0); | ||
} | ||
} | ||
opj_aligned_free(h.mem); | ||
return OPJ_TRUE; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
allocation shall be checked for failure.