Update openjpeg with alchemy changes
This commit is contained in:
@@ -26,6 +26,7 @@ set(openjpeg_SOURCE_FILES
|
||||
mct.c
|
||||
mqc.c
|
||||
openjpeg.c
|
||||
opj_malloc.c
|
||||
phix_manager.c
|
||||
pi.c
|
||||
ppix_manager.c
|
||||
|
||||
@@ -31,11 +31,16 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define OPJ_SKIP_POISON
|
||||
#include "opj_includes.h"
|
||||
|
||||
#ifdef __SSE__
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "opj_includes.h"
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC poison malloc calloc realloc free
|
||||
#endif
|
||||
|
||||
/** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
|
||||
/*@{*/
|
||||
@@ -499,7 +504,7 @@ void dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, int prec) {
|
||||
/* <summary> */
|
||||
/* Determine maximum computed resolution level for inverse wavelet transform */
|
||||
/* </summary> */
|
||||
static int dwt_decode_max_resolution(opj_tcd_resolution_t* restrict r, int i) {
|
||||
static int dwt_decode_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, int i) {
|
||||
int mr = 1;
|
||||
int w;
|
||||
while( --i ) {
|
||||
@@ -531,7 +536,7 @@ static void dwt_decode_tile(opj_tcd_tilecomp_t* tilec, int numres, DWT1DFN dwt_1
|
||||
v.mem = h.mem;
|
||||
|
||||
while( --numres) {
|
||||
int * restrict tiledp = tilec->data;
|
||||
int * OPJ_RESTRICT tiledp = tilec->data;
|
||||
int j;
|
||||
|
||||
++tr;
|
||||
@@ -565,48 +570,49 @@ static void dwt_decode_tile(opj_tcd_tilecomp_t* tilec, int numres, DWT1DFN dwt_1
|
||||
opj_aligned_free(h.mem);
|
||||
}
|
||||
|
||||
static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, int size){
|
||||
float* restrict bi = (float*) (w->wavelet + w->cas);
|
||||
static void v4dwt_interleave_h(v4dwt_t* OPJ_RESTRICT w, float* OPJ_RESTRICT a, int x, int size) {
|
||||
float* OPJ_RESTRICT bi = (float*)(w->wavelet + w->cas);
|
||||
int count = w->sn;
|
||||
int i, k;
|
||||
for(k = 0; k < 2; ++k){
|
||||
if (count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0) {
|
||||
for (k = 0; k < 2; ++k) {
|
||||
if (count + 3 * x < size && ((size_t)a & 0x0f) == 0 && ((size_t)bi & 0x0f) == 0 && (x & 0x0f) == 0) {
|
||||
/* Fast code path */
|
||||
for(i = 0; i < count; ++i){
|
||||
for (i = 0; i < count; ++i) {
|
||||
int j = i;
|
||||
bi[i*8 ] = a[j];
|
||||
bi[i * 8] = a[j];
|
||||
j += x;
|
||||
bi[i*8 + 1] = a[j];
|
||||
bi[i * 8 + 1] = a[j];
|
||||
j += x;
|
||||
bi[i*8 + 2] = a[j];
|
||||
bi[i * 8 + 2] = a[j];
|
||||
j += x;
|
||||
bi[i*8 + 3] = a[j];
|
||||
bi[i * 8 + 3] = a[j];
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
/* Slow code path */
|
||||
for(i = 0; i < count; ++i){
|
||||
int j = i;
|
||||
bi[i*8 ] = a[j];
|
||||
j += x;
|
||||
if(j > size) continue;
|
||||
bi[i*8 + 1] = a[j];
|
||||
j += x;
|
||||
if(j > size) continue;
|
||||
bi[i*8 + 2] = a[j];
|
||||
j += x;
|
||||
if(j > size) continue;
|
||||
bi[i*8 + 3] = a[j];
|
||||
for (i = 0; i < count; ++i) {
|
||||
int j = i;
|
||||
bi[i * 8] = a[j];
|
||||
j += x;
|
||||
if (j > size) continue;
|
||||
bi[i * 8 + 1] = a[j];
|
||||
j += x;
|
||||
if (j > size) continue;
|
||||
bi[i * 8 + 2] = a[j];
|
||||
j += x;
|
||||
if (j > size) continue;
|
||||
bi[i * 8 + 3] = a[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
bi = (float*) (w->wavelet + 1 - w->cas);
|
||||
bi = (float*)(w->wavelet + 1 - w->cas);
|
||||
a += w->sn;
|
||||
size -= w->sn;
|
||||
count = w->dn;
|
||||
}
|
||||
}
|
||||
|
||||
static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x){
|
||||
v4* restrict bi = v->wavelet + v->cas;
|
||||
static void v4dwt_interleave_v(v4dwt_t* OPJ_RESTRICT v , float* OPJ_RESTRICT a , int x){
|
||||
v4* OPJ_RESTRICT bi = v->wavelet + v->cas;
|
||||
int i;
|
||||
for(i = 0; i < v->sn; ++i){
|
||||
memcpy(&bi[i*2], &a[i*x], 4 * sizeof(float));
|
||||
@@ -621,7 +627,7 @@ static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x){
|
||||
#ifdef __SSE__
|
||||
|
||||
static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){
|
||||
__m128* restrict vw = (__m128*) w;
|
||||
__m128* OPJ_RESTRICT vw = (__m128*) w;
|
||||
int i;
|
||||
/* 4x unrolled loop */
|
||||
for(i = 0; i < count >> 2; ++i){
|
||||
@@ -642,22 +648,39 @@ static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){
|
||||
}
|
||||
|
||||
static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){
|
||||
__m128* restrict vl = (__m128*) l;
|
||||
__m128* restrict vw = (__m128*) w;
|
||||
__m128* OPJ_RESTRICT vl = (__m128*) l;
|
||||
__m128* OPJ_RESTRICT vw = (__m128*) w;
|
||||
int i;
|
||||
__m128 tmp1, tmp2, tmp3;
|
||||
tmp1 = vl[0];
|
||||
for(i = 0; i < m; ++i){
|
||||
for (i = 0; i < m - 3; i += 4) {
|
||||
__m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
|
||||
tmp2 = vw[-1];
|
||||
tmp3 = vw[0];
|
||||
tmp4 = vw[1];
|
||||
tmp5 = vw[2];
|
||||
tmp6 = vw[3];
|
||||
tmp7 = vw[4];
|
||||
tmp8 = vw[5];
|
||||
tmp9 = vw[6];
|
||||
vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
|
||||
vw[1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c));
|
||||
vw[3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c));
|
||||
vw[5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c));
|
||||
tmp1 = tmp9;
|
||||
vw += 8;
|
||||
}
|
||||
for ( ; i < m; ++i) {
|
||||
tmp2 = vw[-1];
|
||||
tmp3 = vw[ 0];
|
||||
vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
|
||||
tmp1 = tmp3;
|
||||
vw += 2;
|
||||
}
|
||||
vl = vw - 2;
|
||||
if(m >= k){
|
||||
return;
|
||||
}
|
||||
vl = vw - 2;
|
||||
c = _mm_add_ps(c, c);
|
||||
c = _mm_mul_ps(c, vl[0]);
|
||||
for(; m < k; ++m){
|
||||
@@ -670,7 +693,7 @@ static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){
|
||||
#else
|
||||
|
||||
static void v4dwt_decode_step1(v4* w, int count, const float c){
|
||||
float* restrict fw = (float*) w;
|
||||
float* OPJ_RESTRICT fw = (float*) w;
|
||||
int i;
|
||||
for(i = 0; i < count; ++i){
|
||||
float tmp1 = fw[i*8 ];
|
||||
@@ -685,8 +708,8 @@ static void v4dwt_decode_step1(v4* w, int count, const float c){
|
||||
}
|
||||
|
||||
static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){
|
||||
float* restrict fl = (float*) l;
|
||||
float* restrict fw = (float*) w;
|
||||
float* OPJ_RESTRICT fl = (float*) l;
|
||||
float* OPJ_RESTRICT fw = (float*) w;
|
||||
int i;
|
||||
for(i = 0; i < m; ++i){
|
||||
float tmp1_1 = fl[0];
|
||||
@@ -737,42 +760,44 @@ static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){
|
||||
/* <summary> */
|
||||
/* Inverse 9-7 wavelet transform in 1-D. */
|
||||
/* </summary> */
|
||||
static void v4dwt_decode(v4dwt_t* restrict dwt){
|
||||
static void v4dwt_decode(v4dwt_t* OPJ_RESTRICT dwt){
|
||||
int a, b;
|
||||
if(dwt->cas == 0) {
|
||||
if(!((dwt->dn > 0) || (dwt->sn > 1))){
|
||||
if (dwt->dn <= 0 && dwt->sn <= 1) {
|
||||
return;
|
||||
}
|
||||
a = 0;
|
||||
b = 1;
|
||||
}else{
|
||||
if(!((dwt->sn > 0) || (dwt->dn > 1))) {
|
||||
if (dwt->sn <= 0 && dwt->dn <= 1) {
|
||||
return;
|
||||
}
|
||||
a = 1;
|
||||
b = 0;
|
||||
}
|
||||
v4* OPJ_RESTRICT waveleta = dwt->wavelet + a;
|
||||
v4* OPJ_RESTRICT waveletb = dwt->wavelet + b;
|
||||
#ifdef __SSE__
|
||||
v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(K));
|
||||
v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(c13318));
|
||||
v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(dwt_delta));
|
||||
v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(dwt_gamma));
|
||||
v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(dwt_beta));
|
||||
v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(dwt_alpha));
|
||||
v4dwt_decode_step1_sse(waveleta, dwt->sn, _mm_set1_ps(K));
|
||||
v4dwt_decode_step1_sse(waveletb, dwt->dn, _mm_set1_ps(c13318));
|
||||
v4dwt_decode_step2_sse(waveletb, waveleta + 1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(dwt_delta));
|
||||
v4dwt_decode_step2_sse(waveleta, waveletb + 1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(dwt_gamma));
|
||||
v4dwt_decode_step2_sse(waveletb, waveleta + 1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(dwt_beta));
|
||||
v4dwt_decode_step2_sse(waveleta, waveletb + 1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(dwt_alpha));
|
||||
#else
|
||||
v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, K);
|
||||
v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, c13318);
|
||||
v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), dwt_delta);
|
||||
v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), dwt_gamma);
|
||||
v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), dwt_beta);
|
||||
v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), dwt_alpha);
|
||||
v4dwt_decode_step1(waveleta, dwt->sn, K);
|
||||
v4dwt_decode_step1(waveletb, dwt->dn, c13318);
|
||||
v4dwt_decode_step2(waveletb, waveleta + 1, dwt->sn, int_min(dwt->sn, dwt->dn-a), dwt_delta);
|
||||
v4dwt_decode_step2(waveleta, waveletb + 1, dwt->dn, int_min(dwt->dn, dwt->sn-b), dwt_gamma);
|
||||
v4dwt_decode_step2(waveletb, waveleta + 1, dwt->sn, int_min(dwt->sn, dwt->dn-a), dwt_beta);
|
||||
v4dwt_decode_step2(waveleta, waveletb + 1, dwt->dn, int_min(dwt->dn, dwt->sn-b), dwt_alpha);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* <summary> */
|
||||
/* Inverse 9-7 wavelet transform in 2-D. */
|
||||
/* </summary> */
|
||||
void dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){
|
||||
void dwt_decode_real(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, int numres){
|
||||
v4dwt_t h;
|
||||
v4dwt_t v;
|
||||
|
||||
@@ -787,7 +812,7 @@ void dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){
|
||||
v.wavelet = h.wavelet;
|
||||
|
||||
while( --numres) {
|
||||
float * restrict aj = (float*) tilec->data;
|
||||
float * OPJ_RESTRICT aj = (float*) tilec->data;
|
||||
int bufsize = (tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0);
|
||||
int j;
|
||||
|
||||
|
||||
@@ -29,11 +29,16 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define OPJ_SKIP_POISON
|
||||
#include "opj_includes.h"
|
||||
|
||||
#ifdef __SSE__
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "opj_includes.h"
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC poison malloc calloc realloc free
|
||||
#endif
|
||||
|
||||
/* <summary> */
|
||||
/* This table contains the norms of the basis function of the reversible MCT. */
|
||||
@@ -49,17 +54,38 @@ static const double mct_norms_real[3] = { 1.732, 1.805, 1.573 };
|
||||
/* Foward reversible MCT. */
|
||||
/* </summary> */
|
||||
void mct_encode(
|
||||
int* restrict c0,
|
||||
int* restrict c1,
|
||||
int* restrict c2,
|
||||
int* OPJ_RESTRICT c0,
|
||||
int* OPJ_RESTRICT c1,
|
||||
int* OPJ_RESTRICT c2,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i) {
|
||||
int i = 0;
|
||||
#ifdef __SSE2__
|
||||
/* Buffers are normally aligned on 16 bytes... */
|
||||
if (((size_t)c0 & 0xf) == 0 && ((size_t)c1 & 0xf) == 0 && ((size_t)c2 & 0xf) == 0) {
|
||||
const int cnt = n & ~3U;
|
||||
for (; i < cnt; i += 4) {
|
||||
__m128i y, u, v;
|
||||
__m128i r = _mm_load_si128((const __m128i*) & (c0[i]));
|
||||
__m128i g = _mm_load_si128((const __m128i*) & (c1[i]));
|
||||
__m128i b = _mm_load_si128((const __m128i*) & (c2[i]));
|
||||
y = _mm_add_epi32(g, g);
|
||||
y = _mm_add_epi32(y, b);
|
||||
y = _mm_add_epi32(y, r);
|
||||
y = _mm_srai_epi32(y, 2);
|
||||
u = _mm_sub_epi32(b, g);
|
||||
v = _mm_sub_epi32(r, g);
|
||||
_mm_store_si128((__m128i*) & (c0[i]), y);
|
||||
_mm_store_si128((__m128i*) & (c1[i]), u);
|
||||
_mm_store_si128((__m128i*) & (c2[i]), v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
int r = c0[i];
|
||||
int g = c1[i];
|
||||
int b = c2[i];
|
||||
int y = (r + (g * 2) + b) >> 2;
|
||||
int y = (r + g + g + b) >> 2;
|
||||
int u = b - g;
|
||||
int v = r - g;
|
||||
c0[i] = y;
|
||||
@@ -72,13 +98,32 @@ void mct_encode(
|
||||
/* Inverse reversible MCT. */
|
||||
/* </summary> */
|
||||
void mct_decode(
|
||||
int* restrict c0,
|
||||
int* restrict c1,
|
||||
int* restrict c2,
|
||||
int* OPJ_RESTRICT c0,
|
||||
int* OPJ_RESTRICT c1,
|
||||
int* OPJ_RESTRICT c2,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
int i = 0;
|
||||
#ifdef __SSE2__
|
||||
/* Buffers are normally aligned on 16 bytes... */
|
||||
if (((size_t)c0 & 0xf) == 0 && ((size_t)c1 & 0xf) == 0 && ((size_t)c2 & 0xf) == 0) {
|
||||
const int cnt = n & ~3U;
|
||||
for (; i < cnt; i += 4) {
|
||||
__m128i r, g, b;
|
||||
__m128i y = _mm_load_si128((const __m128i*) & (c0[i]));
|
||||
__m128i u = _mm_load_si128((const __m128i*) & (c1[i]));
|
||||
__m128i v = _mm_load_si128((const __m128i*) & (c2[i]));
|
||||
g = y;
|
||||
g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
|
||||
r = _mm_add_epi32(v, g);
|
||||
b = _mm_add_epi32(u, g);
|
||||
_mm_store_si128((__m128i*) & (c0[i]), r);
|
||||
_mm_store_si128((__m128i*) & (c1[i]), g);
|
||||
_mm_store_si128((__m128i*) & (c2[i]), b);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
int y = c0[i];
|
||||
int u = c1[i];
|
||||
int v = c2[i];
|
||||
@@ -102,13 +147,119 @@ double mct_getnorm(int compno) {
|
||||
/* Foward irreversible MCT. */
|
||||
/* </summary> */
|
||||
void mct_encode_real(
|
||||
int* restrict c0,
|
||||
int* restrict c1,
|
||||
int* restrict c2,
|
||||
int* OPJ_RESTRICT c0,
|
||||
int* OPJ_RESTRICT c1,
|
||||
int* OPJ_RESTRICT c2,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i) {
|
||||
int i = 0;
|
||||
#ifdef __SSE4_1__
|
||||
/* Buffers are normally aligned on 16 bytes... */
|
||||
if (((size_t)c0 & 0xf) == 0 && ((size_t)c1 & 0xf) == 0 && ((size_t)c2 & 0xf) == 0) {
|
||||
const int cnt = n & ~3U;
|
||||
const __m128i ry = _mm_set1_epi32(2449);
|
||||
const __m128i gy = _mm_set1_epi32(4809);
|
||||
const __m128i by = _mm_set1_epi32(934);
|
||||
const __m128i ru = _mm_set1_epi32(1382);
|
||||
const __m128i gu = _mm_set1_epi32(2714);
|
||||
const __m128i gv = _mm_set1_epi32(3430);
|
||||
const __m128i bv = _mm_set1_epi32(666);
|
||||
const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_SHUFFLE(1, 0, 1, 0));
|
||||
for (; i < cnt; i += 4) {
|
||||
__m128i lo, hi, y, u, v;
|
||||
__m128i r = _mm_load_si128((const __m128i*) & (c0[i]));
|
||||
__m128i g = _mm_load_si128((const __m128i*) & (c1[i]));
|
||||
__m128i b = _mm_load_si128((const __m128i*) & (c2[i]));
|
||||
|
||||
hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(r, ry);
|
||||
hi = _mm_mul_epi32(hi, ry);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
y = _mm_blend_epi16(lo, hi, 0xCC);
|
||||
|
||||
hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(g, gy);
|
||||
hi = _mm_mul_epi32(hi, gy);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
|
||||
hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(b, by);
|
||||
hi = _mm_mul_epi32(hi, by);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
_mm_store_si128((__m128i*) & (c0[i]), y);
|
||||
|
||||
lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
|
||||
hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
|
||||
lo = _mm_slli_epi64(lo, 12);
|
||||
hi = _mm_slli_epi64(hi, 12);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
u = _mm_blend_epi16(lo, hi, 0xCC);
|
||||
|
||||
hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(r, ru);
|
||||
hi = _mm_mul_epi32(hi, ru);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
|
||||
hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(g, gu);
|
||||
hi = _mm_mul_epi32(hi, gu);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
_mm_store_si128((__m128i*) & (c1[i]), u);
|
||||
|
||||
lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
|
||||
hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
|
||||
lo = _mm_slli_epi64(lo, 12);
|
||||
hi = _mm_slli_epi64(hi, 12);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
v = _mm_blend_epi16(lo, hi, 0xCC);
|
||||
|
||||
hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(g, gv);
|
||||
hi = _mm_mul_epi32(hi, gv);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
|
||||
hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
lo = _mm_mul_epi32(b, bv);
|
||||
hi = _mm_mul_epi32(hi, bv);
|
||||
lo = _mm_add_epi64(lo, mulround);
|
||||
hi = _mm_add_epi64(hi, mulround);
|
||||
lo = _mm_srli_epi64(lo, 13);
|
||||
hi = _mm_slli_epi64(hi, 32 - 13);
|
||||
v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
|
||||
_mm_store_si128((__m128i*) & (c2[i]), v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
int r = c0[i];
|
||||
int g = c1[i];
|
||||
int b = c2[i];
|
||||
@@ -125,19 +276,21 @@ void mct_encode_real(
|
||||
/* Inverse irreversible MCT. */
|
||||
/* </summary> */
|
||||
void mct_decode_real(
|
||||
float* restrict c0,
|
||||
float* restrict c1,
|
||||
float* restrict c2,
|
||||
float* OPJ_RESTRICT c0,
|
||||
float* OPJ_RESTRICT c1,
|
||||
float* OPJ_RESTRICT c2,
|
||||
int n)
|
||||
{
|
||||
int i;
|
||||
#ifdef __SSE__
|
||||
int count;
|
||||
__m128 vrv, vgu, vgv, vbu;
|
||||
vrv = _mm_set1_ps(1.402f);
|
||||
vgu = _mm_set1_ps(0.34413f);
|
||||
vgv = _mm_set1_ps(0.71414f);
|
||||
vbu = _mm_set1_ps(1.772f);
|
||||
for (i = 0; i < (n >> 3); ++i) {
|
||||
count = n >> 3;
|
||||
for (i = 0; i < count; ++i) {
|
||||
__m128 vy, vu, vv;
|
||||
__m128 vr, vg, vb;
|
||||
|
||||
@@ -174,7 +327,7 @@ void mct_decode_real(
|
||||
float u = c1[i];
|
||||
float v = c2[i];
|
||||
float r = y + (v * 1.402f);
|
||||
float g = y - (u * 0.34413f) - (v * (0.71414f));
|
||||
float g = y - (u * 0.34413f) - (v * 0.71414f);
|
||||
float b = y + (u * 1.772f);
|
||||
c0[i] = r;
|
||||
c1[i] = g;
|
||||
|
||||
@@ -40,33 +40,71 @@
|
||||
==========================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
The inline keyword is supported by C99 but not by C90.
|
||||
Most compilers implement their own version of this keyword ...
|
||||
*/
|
||||
#ifndef INLINE
|
||||
#if defined(_MSC_VER)
|
||||
#define INLINE __forceinline
|
||||
#elif defined(__GNUC__)
|
||||
#define INLINE __inline__
|
||||
#elif defined(__MWERKS__)
|
||||
#define INLINE inline
|
||||
#else
|
||||
/* add other compilers here ... */
|
||||
#define INLINE
|
||||
#endif /* defined(<Compiler>) */
|
||||
#endif /* INLINE */
|
||||
#if defined(OPJ_STATIC) || !defined(_WIN32)
|
||||
#define OPJ_API
|
||||
#define OPJ_CALLCONV
|
||||
#else
|
||||
#define OPJ_CALLCONV __stdcall
|
||||
/*
|
||||
The following ifdef block is the standard way of creating macros which make exporting
|
||||
The following ifdef block is the standard way of creating macros which make exporting
|
||||
from a DLL simpler. All files within this DLL are compiled with the OPJ_EXPORTS
|
||||
symbol defined on the command line. this symbol should not be defined on any project
|
||||
that uses this DLL. This way any other project whose source files include this file see
|
||||
OPJ_API functions as being imported from a DLL, wheras this DLL sees symbols
|
||||
that uses this DLL. This way any other project whose source files include this file see
|
||||
OPJ_API functions as being imported from a DLL, whereas this DLL sees symbols
|
||||
defined with this macro as being exported.
|
||||
*/
|
||||
#if defined(OPJ_EXPORTS) || defined(DLL_EXPORT)
|
||||
#define OPJ_API __declspec(dllexport)
|
||||
#else
|
||||
#define OPJ_API __declspec(dllimport)
|
||||
#endif /* OPJ_EXPORTS */
|
||||
# if defined(OPJ_EXPORTS) || defined(DLL_EXPORT)
|
||||
# define OPJ_API __declspec(dllexport)
|
||||
# else
|
||||
# define OPJ_API __declspec(dllimport)
|
||||
# endif /* OPJ_EXPORTS */
|
||||
#endif /* !OPJ_STATIC || !_WIN32 */
|
||||
|
||||
typedef int opj_bool;
|
||||
#define OPJ_TRUE 1
|
||||
#define OPJ_FALSE 0
|
||||
|
||||
typedef char OPJ_CHAR;
|
||||
typedef float OPJ_FLOAT32;
|
||||
typedef double OPJ_FLOAT64;
|
||||
typedef unsigned char OPJ_BYTE;
|
||||
|
||||
#include "opj_stdint.h"
|
||||
|
||||
typedef int8_t OPJ_INT8;
|
||||
typedef uint8_t OPJ_UINT8;
|
||||
typedef int16_t OPJ_INT16;
|
||||
typedef uint16_t OPJ_UINT16;
|
||||
typedef int32_t OPJ_INT32;
|
||||
typedef uint32_t OPJ_UINT32;
|
||||
typedef int64_t OPJ_INT64;
|
||||
typedef uint64_t OPJ_UINT64;
|
||||
|
||||
typedef int64_t OPJ_OFF_T; /* 64-bit file offset type */
|
||||
|
||||
#include <stdio.h>
|
||||
typedef size_t OPJ_SIZE_T;
|
||||
|
||||
/* Avoid compile-time warning because parameter is not used */
|
||||
#define OPJ_ARG_NOT_USED(x) (void)(x)
|
||||
/*
|
||||
|
||||
/*
|
||||
==========================================================
|
||||
Useful constant definitions
|
||||
==========================================================
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
/*
|
||||
==========================================================
|
||||
@@ -54,56 +56,115 @@
|
||||
==========================================================
|
||||
*/
|
||||
|
||||
/* Are restricted pointers available? (C99) */
|
||||
#if (__STDC_VERSION__ >= 199901L)
|
||||
#define OPJ_RESTRICT restrict
|
||||
#else
|
||||
/* Not a C99 compiler */
|
||||
#if defined(__GNUC__)
|
||||
#define OPJ_RESTRICT __restrict__
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
#define OPJ_RESTRICT __restrict
|
||||
#else
|
||||
#define OPJ_RESTRICT /* restrict */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Ignore GCC attributes if this is not GCC */
|
||||
#ifndef __GNUC__
|
||||
#define __attribute__(x) /* __attribute__(x) */
|
||||
#endif
|
||||
|
||||
/*
|
||||
The inline keyword is supported by C99 but not by C90.
|
||||
Most compilers implement their own version of this keyword ...
|
||||
*/
|
||||
#ifndef INLINE
|
||||
#if defined(_MSC_VER)
|
||||
#define INLINE __forceinline
|
||||
#elif defined(__GNUC__)
|
||||
#define INLINE __inline__
|
||||
#elif defined(__MWERKS__)
|
||||
#define INLINE inline
|
||||
#else
|
||||
/* add other compilers here ... */
|
||||
#define INLINE
|
||||
#endif /* defined(<Compiler>) */
|
||||
#endif /* INLINE */
|
||||
|
||||
/* Are restricted pointers available? (C99) */
|
||||
#if (__STDC_VERSION__ != 199901L)
|
||||
/* Not a C99 compiler */
|
||||
#ifdef __GNUC__
|
||||
#define restrict __restrict__
|
||||
#else
|
||||
#define restrict /* restrict */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* MSVC and Borland C do not have lrintf */
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
static INLINE long lrintf(float f){
|
||||
/* MSVC before 2013 and Borland C do not have lrintf */
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
static INLINE long opj_lrintf(float f)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
return (long)((f>0.0f) ? (f + 0.5f):(f -0.5f));
|
||||
#else
|
||||
return _mm_cvt_ss2si(_mm_load_ss(&f));
|
||||
|
||||
/* commented out line breaks many tests */
|
||||
/* return (long)((f>0.0f) ? (f + 0.5f):(f -0.5f)); */
|
||||
#elif defined(_M_IX86)
|
||||
int i;
|
||||
|
||||
_asm{
|
||||
fld f
|
||||
fistp i
|
||||
};
|
||||
|
||||
|
||||
return i;
|
||||
#else
|
||||
return (long)((f>0.0f) ? (f + 0.5f) : (f - 0.5f));
|
||||
#endif
|
||||
}
|
||||
#elif defined(__BORLANDC__)
|
||||
static INLINE long opj_lrintf(float f)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
return (long)((f > 0.0f) ? (f + 0.5f) : (f - 0.5f));
|
||||
#else
|
||||
int i;
|
||||
|
||||
_asm {
|
||||
fld f
|
||||
fistp i
|
||||
};
|
||||
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static INLINE long opj_lrintf(float f)
|
||||
{
|
||||
return lrintf(f);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1400)
|
||||
#define vsnprintf _vsnprintf
|
||||
#endif
|
||||
|
||||
/* MSVC x86 is really bad at doing int64 = int32 * int32 on its own. Use intrinsic. */
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
|
||||
# include <intrin.h>
|
||||
# pragma intrinsic(__emul)
|
||||
#endif
|
||||
|
||||
/* Apparently Visual Studio doesn't define __SSE__ / __SSE2__ macros */
|
||||
#if defined(_M_X64)
|
||||
/* Intel 64bit support SSE and SSE2 */
|
||||
# ifndef __SSE__
|
||||
# define __SSE__ 1
|
||||
# endif
|
||||
# ifndef __SSE2__
|
||||
# define __SSE2__ 1
|
||||
# endif
|
||||
# if !defined(__SSE4_1__) && defined(__AVX__)
|
||||
# define __SSE4_1__ 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* For x86, test the value of the _M_IX86_FP macro. */
|
||||
/* See https://msdn.microsoft.com/en-us/library/b0084kay.aspx */
|
||||
#if defined(_M_IX86_FP)
|
||||
# if _M_IX86_FP >= 1
|
||||
# ifndef __SSE__
|
||||
# define __SSE__ 1
|
||||
# endif
|
||||
# endif
|
||||
# if _M_IX86_FP >= 2
|
||||
# ifndef __SSE2__
|
||||
# define __SSE2__ 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Type to use for bit-fields in internal headers */
|
||||
typedef unsigned int OPJ_BITFIELD;
|
||||
|
||||
#define OPJ_UNUSED(x) (void)x
|
||||
|
||||
#include "j2k_lib.h"
|
||||
#include "opj_malloc.h"
|
||||
#include "event.h"
|
||||
|
||||
249
indra/libopenjpeg/opj_malloc.c
Normal file
249
indra/libopenjpeg/opj_malloc.c
Normal file
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
* The copyright in this software is being made available under the 2-clauses
|
||||
* BSD License, included below. This software may be subject to other third
|
||||
* party and contributor rights, including patent rights, and no such rights
|
||||
* are granted under this license.
|
||||
*
|
||||
* Copyright (c) 2015, Mathieu Malaterre <mathieu.malaterre@gmail.com>
|
||||
* Copyright (c) 2015, Matthieu Darbois
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#define OPJ_SKIP_POISON
|
||||
#include "opj_includes.h"
|
||||
|
||||
#if defined(OPJ_HAVE_MALLOC_H) && defined(OPJ_HAVE_MEMALIGN)
|
||||
# include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifndef SIZE_MAX
|
||||
# define SIZE_MAX ((size_t) -1)
|
||||
#endif
|
||||
|
||||
static INLINE void *opj_aligned_alloc_n(size_t alignment, size_t size)
|
||||
{
|
||||
void* ptr;
|
||||
|
||||
/* alignment shall be power of 2 */
|
||||
assert((alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
|
||||
/* alignment shall be at least sizeof(void*) */
|
||||
assert(alignment >= sizeof(void*));
|
||||
|
||||
if (size == 0U) { /* prevent implementation defined behavior of realloc */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(OPJ_HAVE_POSIX_MEMALIGN)
|
||||
/* aligned_alloc requires c11, restrict to posix_memalign for now. Quote:
|
||||
* This function was introduced in POSIX 1003.1d. Although this function is
|
||||
* superseded by aligned_alloc, it is more portable to older POSIX systems
|
||||
* that do not support ISO C11. */
|
||||
if (posix_memalign(&ptr, alignment, size)) {
|
||||
ptr = NULL;
|
||||
}
|
||||
/* older linux */
|
||||
#elif defined(OPJ_HAVE_MEMALIGN)
|
||||
ptr = memalign(alignment, size);
|
||||
/* _MSC_VER */
|
||||
#elif defined(OPJ_HAVE__ALIGNED_MALLOC)
|
||||
ptr = _aligned_malloc(size, alignment);
|
||||
#else
|
||||
/*
|
||||
* Generic aligned malloc implementation.
|
||||
* Uses size_t offset for the integer manipulation of the pointer,
|
||||
* as uintptr_t is not available in C89 to do
|
||||
* bitwise operations on the pointer itself.
|
||||
*/
|
||||
alignment--;
|
||||
{
|
||||
size_t offset;
|
||||
OPJ_UINT8 *mem;
|
||||
|
||||
/* Room for padding and extra pointer stored in front of allocated area */
|
||||
size_t overhead = alignment + sizeof(void *);
|
||||
|
||||
/* let's be extra careful */
|
||||
assert(alignment <= (SIZE_MAX - sizeof(void *)));
|
||||
|
||||
/* Avoid integer overflow */
|
||||
if (size > (SIZE_MAX - overhead)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mem = (OPJ_UINT8*)malloc(size + overhead);
|
||||
if (mem == NULL) {
|
||||
return mem;
|
||||
}
|
||||
/* offset = ((alignment + 1U) - ((size_t)(mem + sizeof(void*)) & alignment)) & alignment; */
|
||||
/* Use the fact that alignment + 1U is a power of 2 */
|
||||
offset = ((alignment ^ ((size_t)(mem + sizeof(void*)) & alignment)) + 1U) &
|
||||
alignment;
|
||||
ptr = (void *)(mem + sizeof(void*) + offset);
|
||||
((void**) ptr)[-1] = mem;
|
||||
}
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
static INLINE void *opj_aligned_realloc_n(void *ptr, size_t alignment,
|
||||
size_t new_size)
|
||||
{
|
||||
void *r_ptr;
|
||||
|
||||
/* alignment shall be power of 2 */
|
||||
assert((alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
|
||||
/* alignment shall be at least sizeof(void*) */
|
||||
assert(alignment >= sizeof(void*));
|
||||
|
||||
if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* no portable aligned realloc */
|
||||
#if defined(OPJ_HAVE_POSIX_MEMALIGN) || defined(OPJ_HAVE_MEMALIGN)
|
||||
/* glibc doc states one can mix aligned malloc with realloc */
|
||||
r_ptr = realloc(ptr, new_size); /* fast path */
|
||||
/* we simply use `size_t` to cast, since we are only interest in binary AND
|
||||
* operator */
|
||||
if (((size_t)r_ptr & (alignment - 1U)) != 0U) {
|
||||
/* this is non-trivial to implement a portable aligned realloc, so use a
|
||||
* simple approach where we do not need a function that return the size of an
|
||||
* allocated array (eg. _msize on Windows, malloc_size on MacOS,
|
||||
* malloc_usable_size on systems with glibc) */
|
||||
void *a_ptr = opj_aligned_alloc_n(alignment, new_size);
|
||||
if (a_ptr != NULL) {
|
||||
memcpy(a_ptr, r_ptr, new_size);
|
||||
}
|
||||
free(r_ptr);
|
||||
r_ptr = a_ptr;
|
||||
}
|
||||
/* _MSC_VER */
|
||||
#elif defined(OPJ_HAVE__ALIGNED_MALLOC)
|
||||
r_ptr = _aligned_realloc(ptr, new_size, alignment);
|
||||
#else
|
||||
if (ptr == NULL) {
|
||||
return opj_aligned_alloc_n(alignment, new_size);
|
||||
}
|
||||
alignment--;
|
||||
{
|
||||
void *oldmem;
|
||||
OPJ_UINT8 *newmem;
|
||||
size_t overhead = alignment + sizeof(void *);
|
||||
|
||||
/* let's be extra careful */
|
||||
assert(alignment <= (SIZE_MAX - sizeof(void *)));
|
||||
|
||||
/* Avoid integer overflow */
|
||||
if (new_size > SIZE_MAX - overhead) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
oldmem = ((void**) ptr)[-1];
|
||||
newmem = (OPJ_UINT8*)realloc(oldmem, new_size + overhead);
|
||||
if (newmem == NULL) {
|
||||
return newmem;
|
||||
}
|
||||
|
||||
if (newmem == oldmem) {
|
||||
r_ptr = ptr;
|
||||
} else {
|
||||
size_t old_offset;
|
||||
size_t new_offset;
|
||||
|
||||
/* realloc created a new copy, realign the copied memory block */
|
||||
old_offset = (size_t)((OPJ_UINT8*)ptr - (OPJ_UINT8*)oldmem);
|
||||
|
||||
/* offset = ((alignment + 1U) - ((size_t)(mem + sizeof(void*)) & alignment)) & alignment; */
|
||||
/* Use the fact that alignment + 1U is a power of 2 */
|
||||
new_offset = ((alignment ^ ((size_t)(newmem + sizeof(void*)) & alignment)) +
|
||||
1U) & alignment;
|
||||
new_offset += sizeof(void*);
|
||||
r_ptr = (void *)(newmem + new_offset);
|
||||
|
||||
if (new_offset != old_offset) {
|
||||
memmove(newmem + new_offset, newmem + old_offset, new_size);
|
||||
}
|
||||
((void**) r_ptr)[-1] = newmem;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return r_ptr;
|
||||
}
|
||||
void * opj_malloc(size_t size)
|
||||
{
|
||||
if (size == 0U) { /* prevent implementation defined behavior of realloc */
|
||||
return NULL;
|
||||
}
|
||||
return malloc(size);
|
||||
}
|
||||
void * opj_calloc(size_t num, size_t size)
|
||||
{
|
||||
if (num == 0 || size == 0) {
|
||||
/* prevent implementation defined behavior of realloc */
|
||||
return NULL;
|
||||
}
|
||||
return calloc(num, size);
|
||||
}
|
||||
|
||||
void *opj_aligned_malloc(size_t size)
|
||||
{
|
||||
return opj_aligned_alloc_n(16U, size);
|
||||
}
|
||||
void * opj_aligned_realloc(void *ptr, size_t size)
|
||||
{
|
||||
return opj_aligned_realloc_n(ptr, 16U, size);
|
||||
}
|
||||
|
||||
void *opj_aligned_32_malloc(size_t size)
|
||||
{
|
||||
return opj_aligned_alloc_n(32U, size);
|
||||
}
|
||||
void * opj_aligned_32_realloc(void *ptr, size_t size)
|
||||
{
|
||||
return opj_aligned_realloc_n(ptr, 32U, size);
|
||||
}
|
||||
|
||||
void opj_aligned_free(void* ptr)
|
||||
{
|
||||
#if defined(OPJ_HAVE_POSIX_MEMALIGN) || defined(OPJ_HAVE_MEMALIGN)
|
||||
free(ptr);
|
||||
#elif defined(OPJ_HAVE__ALIGNED_MALLOC)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
/* Generic implementation has malloced pointer stored in front of used area */
|
||||
if (ptr != NULL) {
|
||||
free(((void**) ptr)[-1]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void * opj_realloc(void *ptr, size_t new_size)
|
||||
{
|
||||
if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
|
||||
return NULL;
|
||||
}
|
||||
return realloc(ptr, new_size);
|
||||
}
|
||||
void opj_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
@@ -1,4 +1,9 @@
|
||||
/*
|
||||
* The copyright in this software is being made available under the 2-clauses
|
||||
* BSD License, included below. This software may be subject to other third
|
||||
* party and contributor rights, including patent rights, and no such rights
|
||||
* are granted under this license.
|
||||
*
|
||||
* Copyright (c) 2005, Herve Drolon, FreeImage Team
|
||||
* Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
|
||||
* All rights reserved.
|
||||
@@ -24,8 +29,10 @@
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __OPJ_MALLOC_H
|
||||
#define __OPJ_MALLOC_H
|
||||
#ifndef OPJ_MALLOC_H
|
||||
#define OPJ_MALLOC_H
|
||||
|
||||
#include <stddef.h>
|
||||
/**
|
||||
@file opj_malloc.h
|
||||
@brief Internal functions
|
||||
@@ -36,6 +43,17 @@ The functions in opj_malloc.h are internal utilities used for memory management.
|
||||
/** @defgroup MISC MISC - Miscellaneous internal functions */
|
||||
/*@{*/
|
||||
|
||||
/* FIXME: These should be set with cmake tests, but we're currently not requiring use of cmake */
|
||||
#ifdef _WIN32
|
||||
#define OPJ_HAVE__ALIGNED_MALLOC
|
||||
#else /* Not _WIN32 */
|
||||
#if defined(__sun)
|
||||
#define OPJ_HAVE_MEMALIGN
|
||||
#elif defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__)
|
||||
#define OPJ_HAVE_POSIX_MEMALIGN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** @name Exported functions */
|
||||
/*@{*/
|
||||
/* ----------------------------------------------------------------------- */
|
||||
@@ -45,90 +63,32 @@ Allocate an uninitialized memory block
|
||||
@param size Bytes to allocate
|
||||
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
|
||||
*/
|
||||
#ifdef ALLOC_PERF_OPT
|
||||
void * OPJ_CALLCONV opj_malloc(size_t size);
|
||||
#else
|
||||
#define opj_malloc(size) malloc(size)
|
||||
#endif
|
||||
void * opj_malloc(size_t size);
|
||||
|
||||
/**
|
||||
Allocate a memory block with elements initialized to 0
|
||||
@param num Blocks to allocate
|
||||
@param size Bytes per block to allocate
|
||||
@param numOfElements Blocks to allocate
|
||||
@param sizeOfElements Bytes per block to allocate
|
||||
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
|
||||
*/
|
||||
#ifdef ALLOC_PERF_OPT
|
||||
void * OPJ_CALLCONV opj_calloc(size_t _NumOfElements, size_t _SizeOfElements);
|
||||
#else
|
||||
#define opj_calloc(num, size) calloc(num, size)
|
||||
#endif
|
||||
void * opj_calloc(size_t numOfElements, size_t sizeOfElements);
|
||||
|
||||
/**
|
||||
Allocate memory aligned to a 16 byte boundry
|
||||
Allocate memory aligned to a 16 byte boundary
|
||||
@param size Bytes to allocate
|
||||
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
|
||||
*/
|
||||
/* FIXME: These should be set with cmake tests, but we're currently not requiring use of cmake */
|
||||
#ifdef _WIN32
|
||||
/* Someone should tell the mingw people that their malloc.h ought to provide _mm_malloc() */
|
||||
#ifdef __GNUC__
|
||||
#include <mm_malloc.h>
|
||||
#define HAVE_MM_MALLOC
|
||||
#else /* MSVC, Intel C++ */
|
||||
#include <malloc.h>
|
||||
#ifdef _mm_malloc
|
||||
#define HAVE_MM_MALLOC
|
||||
#endif
|
||||
#endif
|
||||
#else /* Not _WIN32 */
|
||||
#if defined(__sun)
|
||||
#define HAVE_MEMALIGN
|
||||
#elif defined(__FreeBSD__)
|
||||
#define HAVE_POSIX_MEMALIGN
|
||||
/* Linux x86_64 and OSX always align allocations to 16 bytes */
|
||||
#elif !defined(__amd64__) && !defined(__APPLE__) && !defined(_AIX)
|
||||
#define HAVE_MEMALIGN
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#endif
|
||||
void * opj_aligned_malloc(size_t size);
|
||||
void * opj_aligned_realloc(void *ptr, size_t size);
|
||||
void opj_aligned_free(void* ptr);
|
||||
|
||||
#define opj_aligned_malloc(size) malloc(size)
|
||||
#define opj_aligned_free(m) free(m)
|
||||
|
||||
#ifdef HAVE_MM_MALLOC
|
||||
#undef opj_aligned_malloc
|
||||
#define opj_aligned_malloc(size) _mm_malloc(size, 16)
|
||||
#undef opj_aligned_free
|
||||
#define opj_aligned_free(m) _mm_free(m)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMALIGN
|
||||
extern void* memalign(size_t, size_t);
|
||||
#undef opj_aligned_malloc
|
||||
#define opj_aligned_malloc(size) memalign(16, (size))
|
||||
#undef opj_aligned_free
|
||||
#define opj_aligned_free(m) free(m)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_POSIX_MEMALIGN
|
||||
#undef opj_aligned_malloc
|
||||
extern int posix_memalign(void**, size_t, size_t);
|
||||
|
||||
static INLINE void* __attribute__ ((malloc)) opj_aligned_malloc(size_t size){
|
||||
void* mem = NULL;
|
||||
posix_memalign(&mem, 16, size);
|
||||
return mem;
|
||||
}
|
||||
#undef opj_aligned_free
|
||||
#define opj_aligned_free(m) free(m)
|
||||
#endif
|
||||
|
||||
#ifdef ALLOC_PERF_OPT
|
||||
#undef opj_aligned_malloc
|
||||
#define opj_aligned_malloc(size) opj_malloc(size)
|
||||
#undef opj_aligned_free
|
||||
#define opj_aligned_free(m) opj_free(m)
|
||||
#endif
|
||||
/**
|
||||
Allocate memory aligned to a 32 byte boundary
|
||||
@param size Bytes to allocate
|
||||
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
|
||||
*/
|
||||
void * opj_aligned_32_malloc(size_t size);
|
||||
void * opj_aligned_32_realloc(void *ptr, size_t size);
|
||||
|
||||
/**
|
||||
Reallocate memory blocks.
|
||||
@@ -136,23 +96,15 @@ Reallocate memory blocks.
|
||||
@param s New size in bytes
|
||||
@return Returns a void pointer to the reallocated (and possibly moved) memory block
|
||||
*/
|
||||
#ifdef ALLOC_PERF_OPT
|
||||
void * OPJ_CALLCONV opj_realloc(void * m, size_t s);
|
||||
#else
|
||||
#define opj_realloc(m, s) realloc(m, s)
|
||||
#endif
|
||||
void * opj_realloc(void * m, size_t s);
|
||||
|
||||
/**
|
||||
Deallocates or frees a memory block.
|
||||
@param m Previously allocated memory block to be freed
|
||||
*/
|
||||
#ifdef ALLOC_PERF_OPT
|
||||
void OPJ_CALLCONV opj_free(void * m);
|
||||
#else
|
||||
#define opj_free(m) free(m)
|
||||
#endif
|
||||
void opj_free(void * m);
|
||||
|
||||
#ifdef __GNUC__
|
||||
#if defined(__GNUC__) && !defined(OPJ_SKIP_POISON)
|
||||
#pragma GCC poison malloc calloc realloc free
|
||||
#endif
|
||||
|
||||
@@ -161,5 +113,5 @@ void OPJ_CALLCONV opj_free(void * m);
|
||||
|
||||
/*@}*/
|
||||
|
||||
#endif /* __OPJ_MALLOC_H */
|
||||
#endif /* OPJ_MALLOC_H */
|
||||
|
||||
|
||||
51
indra/libopenjpeg/opj_stdint.h
Normal file
51
indra/libopenjpeg/opj_stdint.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* The copyright in this software is being made available under the 2-clauses
|
||||
* BSD License, included below. This software may be subject to other third
|
||||
* party and contributor rights, including patent rights, and no such rights
|
||||
* are granted under this license.
|
||||
*
|
||||
* Copyright (c) 2012, Mathieu Malaterre <mathieu.malaterre@gmail.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef OPJ_STDINT_H
|
||||
#define OPJ_STDINT_H
|
||||
|
||||
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || _MSC_VER >= 1900
|
||||
#include <stdint.h>
|
||||
#else
|
||||
#if defined(_WIN32)
|
||||
typedef signed __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#error unsupported platform
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* OPJ_STDINT_H */
|
||||
@@ -1427,7 +1427,7 @@ void t1_encode_cblks(
|
||||
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
|
||||
|
||||
for (bandno = 0; bandno < res->numbands; ++bandno) {
|
||||
opj_tcd_band_t* restrict band = &res->bands[bandno];
|
||||
opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
|
||||
int bandconst = 8192 * 8192 / ((int) floor(band->stepsize * 8192));
|
||||
|
||||
for (precno = 0; precno < res->pw * res->ph; ++precno) {
|
||||
@@ -1435,8 +1435,8 @@ void t1_encode_cblks(
|
||||
|
||||
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
|
||||
opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
|
||||
int* restrict datap;
|
||||
int* restrict tiledp;
|
||||
int* OPJ_RESTRICT datap;
|
||||
int* OPJ_RESTRICT tiledp;
|
||||
int cblk_w;
|
||||
int cblk_h;
|
||||
int i, j;
|
||||
@@ -1517,14 +1517,14 @@ void t1_decode_cblks(
|
||||
opj_tcd_resolution_t* res = &tilec->resolutions[resno];
|
||||
|
||||
for (bandno = 0; bandno < res->numbands; ++bandno) {
|
||||
opj_tcd_band_t* restrict band = &res->bands[bandno];
|
||||
opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
|
||||
|
||||
for (precno = 0; precno < res->pw * res->ph; ++precno) {
|
||||
opj_tcd_precinct_t* precinct = &band->precincts[precno];
|
||||
|
||||
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
|
||||
opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
|
||||
int* restrict datap;
|
||||
int* OPJ_RESTRICT datap;
|
||||
int cblk_w, cblk_h;
|
||||
int x, y;
|
||||
int i, j;
|
||||
@@ -1566,7 +1566,7 @@ void t1_decode_cblks(
|
||||
}
|
||||
|
||||
if (tccp->qmfbid == 1) {
|
||||
int* restrict tiledp = &tilec->data[(y * tile_w) + x];
|
||||
int* OPJ_RESTRICT tiledp = &tilec->data[(y * tile_w) + x];
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int tmp = datap[(j * cblk_w) + i];
|
||||
@@ -1574,9 +1574,9 @@ void t1_decode_cblks(
|
||||
}
|
||||
}
|
||||
} else { /* if (tccp->qmfbid == 0) */
|
||||
float* restrict tiledp = (float*) &tilec->data[(y * tile_w) + x];
|
||||
float* OPJ_RESTRICT tiledp = (float*) &tilec->data[(y * tile_w) + x];
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
float* restrict tiledp2 = tiledp;
|
||||
float* OPJ_RESTRICT tiledp2 = tiledp;
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
float tmp = *datap * band->stepsize;
|
||||
*tiledp2 = tmp;
|
||||
|
||||
@@ -194,7 +194,7 @@ int main(){
|
||||
|
||||
printf("/* This file was automatically generated by t1_generate_luts.c */\n\n");
|
||||
|
||||
// lut_ctxno_zc
|
||||
/* lut_ctxno_zc */
|
||||
for (j = 0; j < 4; ++j) {
|
||||
for (i = 0; i < 256; ++i) {
|
||||
int orient = j;
|
||||
@@ -215,7 +215,7 @@ int main(){
|
||||
}
|
||||
printf("%i\n};\n\n", lut_ctxno_zc[1023]);
|
||||
|
||||
// lut_ctxno_sc
|
||||
/* lut_ctxno_sc */
|
||||
printf("static char lut_ctxno_sc[256] = {\n ");
|
||||
for (i = 0; i < 255; ++i) {
|
||||
printf("0x%x, ", t1_init_ctxno_sc(i << 4));
|
||||
@@ -224,7 +224,7 @@ int main(){
|
||||
}
|
||||
printf("0x%x\n};\n\n", t1_init_ctxno_sc(255 << 4));
|
||||
|
||||
// lut_spb
|
||||
/* lut_spb */
|
||||
printf("static char lut_spb[256] = {\n ");
|
||||
for (i = 0; i < 255; ++i) {
|
||||
printf("%i, ", t1_init_spb(i << 4));
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
*/
|
||||
|
||||
#include "opj_includes.h"
|
||||
#include <assert.h>
|
||||
|
||||
/** @defgroup T2 T2 - Implementation of a tier-2 coding */
|
||||
/*@{*/
|
||||
@@ -340,13 +341,15 @@ static int t2_decode_packet(opj_t2_t* t2, unsigned char *src, int len, opj_tcd_t
|
||||
int precno = pi->precno; /* precinct value */
|
||||
int layno = pi->layno; /* quality layer value */
|
||||
|
||||
opj_tcd_resolution_t* res = &tile->comps[compno].resolutions[resno];
|
||||
|
||||
unsigned char *hd = NULL;
|
||||
int present;
|
||||
|
||||
opj_bio_t *bio = NULL; /* BIO component */
|
||||
|
||||
|
||||
opj_tcd_resolution_t* res;
|
||||
assert(&tile->comps[compno] != NULL);
|
||||
res = &tile->comps[compno].resolutions[resno];
|
||||
|
||||
if (layno == 0) {
|
||||
for (bandno = 0; bandno < res->numbands; bandno++) {
|
||||
opj_tcd_band_t *band = &res->bands[bandno];
|
||||
|
||||
@@ -1507,7 +1507,7 @@ opj_bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno
|
||||
for(j = res->y0; j < res->y1; ++j) {
|
||||
for(i = res->x0; i < res->x1; ++i) {
|
||||
float tmp = ((float*)tilec->data)[i - res->x0 + (j - res->y0) * tw];
|
||||
int v = lrintf(tmp);
|
||||
int v = opj_lrintf(tmp);
|
||||
v += adjust;
|
||||
imagec->data[(i - offset_x) + (j - offset_y) * w] = int_clamp(v, min, max);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user