1 /*
2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license.
6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren
11 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12 * Copyright (c) 2003-2014, Antonin Descampe
13 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14 * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
15 * Copyright (c) 2012, CS Systemes d'Information, France
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386)
41 #define USE_SSE
42 #include <xmmintrin.h>
43 #endif
44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386)
45 #define USE_SSE2
46 #include <emmintrin.h>
47 #endif
48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
49 #define USE_SSE4
50 #include <smmintrin.h>
51 #endif
52
53 #include "opj_includes.h"
54
55 /* <summary> */
56 /* This table contains the norms of the basis function of the reversible MCT. */
57 /* </summary> */
58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
59
60 /* <summary> */
61 /* This table contains the norms of the basis function of the irreversible MCT. */
62 /* </summary> */
63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
64
opj_mct_get_mct_norms()65 const OPJ_FLOAT64 * opj_mct_get_mct_norms ()
66 {
67 return opj_mct_norms;
68 }
69
opj_mct_get_mct_norms_real()70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
71 {
72 return opj_mct_norms_real;
73 }
74
75 /* <summary> */
76 /* Forward reversible MCT. */
77 /* </summary> */
78 #ifdef USE_SSE2
opj_mct_encode(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)79 void opj_mct_encode(
80 OPJ_INT32* restrict c0,
81 OPJ_INT32* restrict c1,
82 OPJ_INT32* restrict c2,
83 OPJ_UINT32 n)
84 {
85 OPJ_SIZE_T i;
86 const OPJ_SIZE_T len = n;
87
88 for(i = 0; i < (len & ~3U); i += 4) {
89 __m128i y, u, v;
90 __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
91 __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
92 __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
93 y = _mm_add_epi32(g, g);
94 y = _mm_add_epi32(y, b);
95 y = _mm_add_epi32(y, r);
96 y = _mm_srai_epi32(y, 2);
97 u = _mm_sub_epi32(b, g);
98 v = _mm_sub_epi32(r, g);
99 _mm_store_si128((__m128i *)&(c0[i]), y);
100 _mm_store_si128((__m128i *)&(c1[i]), u);
101 _mm_store_si128((__m128i *)&(c2[i]), v);
102 }
103
104 for(; i < len; ++i) {
105 OPJ_INT32 r = c0[i];
106 OPJ_INT32 g = c1[i];
107 OPJ_INT32 b = c2[i];
108 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
109 OPJ_INT32 u = b - g;
110 OPJ_INT32 v = r - g;
111 c0[i] = y;
112 c1[i] = u;
113 c2[i] = v;
114 }
115 }
116 #else
opj_mct_encode(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)117 void opj_mct_encode(
118 OPJ_INT32* restrict c0,
119 OPJ_INT32* restrict c1,
120 OPJ_INT32* restrict c2,
121 OPJ_UINT32 n)
122 {
123 OPJ_SIZE_T i;
124 const OPJ_SIZE_T len = n;
125
126 for(i = 0; i < len; ++i) {
127 OPJ_INT32 r = c0[i];
128 OPJ_INT32 g = c1[i];
129 OPJ_INT32 b = c2[i];
130 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
131 OPJ_INT32 u = b - g;
132 OPJ_INT32 v = r - g;
133 c0[i] = y;
134 c1[i] = u;
135 c2[i] = v;
136 }
137 }
138 #endif
139
140 /* <summary> */
141 /* Inverse reversible MCT. */
142 /* </summary> */
143 #ifdef USE_SSE2
opj_mct_decode(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)144 void opj_mct_decode(
145 OPJ_INT32* restrict c0,
146 OPJ_INT32* restrict c1,
147 OPJ_INT32* restrict c2,
148 OPJ_UINT32 n)
149 {
150 OPJ_SIZE_T i;
151 const OPJ_SIZE_T len = n;
152
153 for(i = 0; i < (len & ~3U); i += 4) {
154 __m128i r, g, b;
155 __m128i y = _mm_load_si128((const __m128i *)&(c0[i]));
156 __m128i u = _mm_load_si128((const __m128i *)&(c1[i]));
157 __m128i v = _mm_load_si128((const __m128i *)&(c2[i]));
158 g = y;
159 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
160 r = _mm_add_epi32(v, g);
161 b = _mm_add_epi32(u, g);
162 _mm_store_si128((__m128i *)&(c0[i]), r);
163 _mm_store_si128((__m128i *)&(c1[i]), g);
164 _mm_store_si128((__m128i *)&(c2[i]), b);
165 }
166 for (; i < len; ++i) {
167 OPJ_INT32 y = c0[i];
168 OPJ_INT32 u = c1[i];
169 OPJ_INT32 v = c2[i];
170 OPJ_INT32 g = y - ((u + v) >> 2);
171 OPJ_INT32 r = v + g;
172 OPJ_INT32 b = u + g;
173 c0[i] = r;
174 c1[i] = g;
175 c2[i] = b;
176 }
177 }
178 #else
opj_mct_decode(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)179 void opj_mct_decode(
180 OPJ_INT32* restrict c0,
181 OPJ_INT32* restrict c1,
182 OPJ_INT32* restrict c2,
183 OPJ_UINT32 n)
184 {
185 OPJ_UINT32 i;
186 for (i = 0; i < n; ++i) {
187 OPJ_INT32 y = c0[i];
188 OPJ_INT32 u = c1[i];
189 OPJ_INT32 v = c2[i];
190 OPJ_INT32 g = y - ((u + v) >> 2);
191 OPJ_INT32 r = v + g;
192 OPJ_INT32 b = u + g;
193 c0[i] = r;
194 c1[i] = g;
195 c2[i] = b;
196 }
197 }
198 #endif
199
200 /* <summary> */
201 /* Get norm of basis function of reversible MCT. */
202 /* </summary> */
opj_mct_getnorm(OPJ_UINT32 compno)203 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
204 return opj_mct_norms[compno];
205 }
206
207 /* <summary> */
208 /* Forward irreversible MCT. */
209 /* </summary> */
210 #ifdef USE_SSE4
opj_mct_encode_real(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)211 void opj_mct_encode_real(
212 OPJ_INT32* restrict c0,
213 OPJ_INT32* restrict c1,
214 OPJ_INT32* restrict c2,
215 OPJ_UINT32 n)
216 {
217 OPJ_SIZE_T i;
218 const OPJ_SIZE_T len = n;
219
220 const __m128i ry = _mm_set1_epi32(2449);
221 const __m128i gy = _mm_set1_epi32(4809);
222 const __m128i by = _mm_set1_epi32(934);
223 const __m128i ru = _mm_set1_epi32(1382);
224 const __m128i gu = _mm_set1_epi32(2714);
225 /* const __m128i bu = _mm_set1_epi32(4096); */
226 /* const __m128i rv = _mm_set1_epi32(4096); */
227 const __m128i gv = _mm_set1_epi32(3430);
228 const __m128i bv = _mm_set1_epi32(666);
229 const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_SHUFFLE(1, 0, 1, 0));
230
231 for(i = 0; i < (len & ~3U); i += 4) {
232 __m128i lo, hi;
233 __m128i y, u, v;
234 __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
235 __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
236 __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
237
238 lo = r;
239 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
240 lo = _mm_mul_epi32(lo, ry);
241 hi = _mm_mul_epi32(hi, ry);
242 lo = _mm_add_epi64(lo, mulround);
243 hi = _mm_add_epi64(hi, mulround);
244 lo = _mm_srli_epi64(lo, 13);
245 hi = _mm_slli_epi64(hi, 32-13);
246 y = _mm_blend_epi16(lo, hi, 0xCC);
247
248 lo = g;
249 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
250 lo = _mm_mul_epi32(lo, gy);
251 hi = _mm_mul_epi32(hi, gy);
252 lo = _mm_add_epi64(lo, mulround);
253 hi = _mm_add_epi64(hi, mulround);
254 lo = _mm_srli_epi64(lo, 13);
255 hi = _mm_slli_epi64(hi, 32-13);
256 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
257
258 lo = b;
259 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
260 lo = _mm_mul_epi32(lo, by);
261 hi = _mm_mul_epi32(hi, by);
262 lo = _mm_add_epi64(lo, mulround);
263 hi = _mm_add_epi64(hi, mulround);
264 lo = _mm_srli_epi64(lo, 13);
265 hi = _mm_slli_epi64(hi, 32-13);
266 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
267 _mm_store_si128((__m128i *)&(c0[i]), y);
268
269 /*lo = b;
270 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
271 lo = _mm_mul_epi32(lo, mulround);
272 hi = _mm_mul_epi32(hi, mulround);*/
273 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
274 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
275 lo = _mm_slli_epi64(lo, 12);
276 hi = _mm_slli_epi64(hi, 12);
277 lo = _mm_add_epi64(lo, mulround);
278 hi = _mm_add_epi64(hi, mulround);
279 lo = _mm_srli_epi64(lo, 13);
280 hi = _mm_slli_epi64(hi, 32-13);
281 u = _mm_blend_epi16(lo, hi, 0xCC);
282
283 lo = r;
284 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
285 lo = _mm_mul_epi32(lo, ru);
286 hi = _mm_mul_epi32(hi, ru);
287 lo = _mm_add_epi64(lo, mulround);
288 hi = _mm_add_epi64(hi, mulround);
289 lo = _mm_srli_epi64(lo, 13);
290 hi = _mm_slli_epi64(hi, 32-13);
291 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
292
293 lo = g;
294 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
295 lo = _mm_mul_epi32(lo, gu);
296 hi = _mm_mul_epi32(hi, gu);
297 lo = _mm_add_epi64(lo, mulround);
298 hi = _mm_add_epi64(hi, mulround);
299 lo = _mm_srli_epi64(lo, 13);
300 hi = _mm_slli_epi64(hi, 32-13);
301 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
302 _mm_store_si128((__m128i *)&(c1[i]), u);
303
304 /*lo = r;
305 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
306 lo = _mm_mul_epi32(lo, mulround);
307 hi = _mm_mul_epi32(hi, mulround);*/
308 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
309 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
310 lo = _mm_slli_epi64(lo, 12);
311 hi = _mm_slli_epi64(hi, 12);
312 lo = _mm_add_epi64(lo, mulround);
313 hi = _mm_add_epi64(hi, mulround);
314 lo = _mm_srli_epi64(lo, 13);
315 hi = _mm_slli_epi64(hi, 32-13);
316 v = _mm_blend_epi16(lo, hi, 0xCC);
317
318 lo = g;
319 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
320 lo = _mm_mul_epi32(lo, gv);
321 hi = _mm_mul_epi32(hi, gv);
322 lo = _mm_add_epi64(lo, mulround);
323 hi = _mm_add_epi64(hi, mulround);
324 lo = _mm_srli_epi64(lo, 13);
325 hi = _mm_slli_epi64(hi, 32-13);
326 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
327
328 lo = b;
329 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
330 lo = _mm_mul_epi32(lo, bv);
331 hi = _mm_mul_epi32(hi, bv);
332 lo = _mm_add_epi64(lo, mulround);
333 hi = _mm_add_epi64(hi, mulround);
334 lo = _mm_srli_epi64(lo, 13);
335 hi = _mm_slli_epi64(hi, 32-13);
336 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
337 _mm_store_si128((__m128i *)&(c2[i]), v);
338 }
339 for(; i < len; ++i) {
340 OPJ_INT32 r = c0[i];
341 OPJ_INT32 g = c1[i];
342 OPJ_INT32 b = c2[i];
343 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
344 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
345 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
346 c0[i] = y;
347 c1[i] = u;
348 c2[i] = v;
349 }
350 }
351 #else
opj_mct_encode_real(OPJ_INT32 * restrict c0,OPJ_INT32 * restrict c1,OPJ_INT32 * restrict c2,OPJ_UINT32 n)352 void opj_mct_encode_real(
353 OPJ_INT32* restrict c0,
354 OPJ_INT32* restrict c1,
355 OPJ_INT32* restrict c2,
356 OPJ_UINT32 n)
357 {
358 OPJ_UINT32 i;
359 for(i = 0; i < n; ++i) {
360 OPJ_INT32 r = c0[i];
361 OPJ_INT32 g = c1[i];
362 OPJ_INT32 b = c2[i];
363 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
364 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
365 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
366 c0[i] = y;
367 c1[i] = u;
368 c2[i] = v;
369 }
370 }
371 #endif
372
373 /* <summary> */
374 /* Inverse irreversible MCT. */
375 /* </summary> */
opj_mct_decode_real(OPJ_FLOAT32 * restrict c0,OPJ_FLOAT32 * restrict c1,OPJ_FLOAT32 * restrict c2,OPJ_UINT32 n)376 void opj_mct_decode_real(
377 OPJ_FLOAT32* restrict c0,
378 OPJ_FLOAT32* restrict c1,
379 OPJ_FLOAT32* restrict c2,
380 OPJ_UINT32 n)
381 {
382 OPJ_UINT32 i;
383 #ifdef USE_SSE
384 __m128 vrv, vgu, vgv, vbu;
385 vrv = _mm_set1_ps(1.402f);
386 vgu = _mm_set1_ps(0.34413f);
387 vgv = _mm_set1_ps(0.71414f);
388 vbu = _mm_set1_ps(1.772f);
389 for (i = 0; i < (n >> 3); ++i) {
390 __m128 vy, vu, vv;
391 __m128 vr, vg, vb;
392
393 vy = _mm_load_ps(c0);
394 vu = _mm_load_ps(c1);
395 vv = _mm_load_ps(c2);
396 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
397 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
398 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
399 _mm_store_ps(c0, vr);
400 _mm_store_ps(c1, vg);
401 _mm_store_ps(c2, vb);
402 c0 += 4;
403 c1 += 4;
404 c2 += 4;
405
406 vy = _mm_load_ps(c0);
407 vu = _mm_load_ps(c1);
408 vv = _mm_load_ps(c2);
409 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
410 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
411 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
412 _mm_store_ps(c0, vr);
413 _mm_store_ps(c1, vg);
414 _mm_store_ps(c2, vb);
415 c0 += 4;
416 c1 += 4;
417 c2 += 4;
418 }
419 n &= 7;
420 #endif
421 for(i = 0; i < n; ++i) {
422 OPJ_FLOAT32 y = c0[i];
423 OPJ_FLOAT32 u = c1[i];
424 OPJ_FLOAT32 v = c2[i];
425 OPJ_FLOAT32 r = y + (v * 1.402f);
426 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
427 OPJ_FLOAT32 b = y + (u * 1.772f);
428 c0[i] = r;
429 c1[i] = g;
430 c2[i] = b;
431 }
432 }
433
434 /* <summary> */
435 /* Get norm of basis function of irreversible MCT. */
436 /* </summary> */
opj_mct_getnorm_real(OPJ_UINT32 compno)437 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) {
438 return opj_mct_norms_real[compno];
439 }
440
441
opj_mct_encode_custom(OPJ_BYTE * pCodingdata,OPJ_UINT32 n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)442 OPJ_BOOL opj_mct_encode_custom(
443 OPJ_BYTE * pCodingdata,
444 OPJ_UINT32 n,
445 OPJ_BYTE ** pData,
446 OPJ_UINT32 pNbComp,
447 OPJ_UINT32 isSigned)
448 {
449 OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
450 OPJ_UINT32 i;
451 OPJ_UINT32 j;
452 OPJ_UINT32 k;
453 OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
454 OPJ_INT32 * lCurrentData = 00;
455 OPJ_INT32 * lCurrentMatrix = 00;
456 OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
457 OPJ_UINT32 lMultiplicator = 1 << 13;
458 OPJ_INT32 * lMctPtr;
459
460 OPJ_ARG_NOT_USED(isSigned);
461
462 lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));
463 if (! lCurrentData) {
464 return OPJ_FALSE;
465 }
466
467 lCurrentMatrix = lCurrentData + pNbComp;
468
469 for (i =0;i<lNbMatCoeff;++i) {
470 lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
471 }
472
473 for (i = 0; i < n; ++i) {
474 lMctPtr = lCurrentMatrix;
475 for (j=0;j<pNbComp;++j) {
476 lCurrentData[j] = (*(lData[j]));
477 }
478
479 for (j=0;j<pNbComp;++j) {
480 *(lData[j]) = 0;
481 for (k=0;k<pNbComp;++k) {
482 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
483 ++lMctPtr;
484 }
485
486 ++lData[j];
487 }
488 }
489
490 opj_free(lCurrentData);
491
492 return OPJ_TRUE;
493 }
494
opj_mct_decode_custom(OPJ_BYTE * pDecodingData,OPJ_UINT32 n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)495 OPJ_BOOL opj_mct_decode_custom(
496 OPJ_BYTE * pDecodingData,
497 OPJ_UINT32 n,
498 OPJ_BYTE ** pData,
499 OPJ_UINT32 pNbComp,
500 OPJ_UINT32 isSigned)
501 {
502 OPJ_FLOAT32 * lMct;
503 OPJ_UINT32 i;
504 OPJ_UINT32 j;
505 OPJ_UINT32 k;
506
507 OPJ_FLOAT32 * lCurrentData = 00;
508 OPJ_FLOAT32 * lCurrentResult = 00;
509 OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
510
511 OPJ_ARG_NOT_USED(isSigned);
512
513 lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));
514 if (! lCurrentData) {
515 return OPJ_FALSE;
516 }
517 lCurrentResult = lCurrentData + pNbComp;
518
519 for (i = 0; i < n; ++i) {
520 lMct = (OPJ_FLOAT32 *) pDecodingData;
521 for (j=0;j<pNbComp;++j) {
522 lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));
523 }
524 for (j=0;j<pNbComp;++j) {
525 lCurrentResult[j] = 0;
526 for (k=0;k<pNbComp;++k) {
527 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
528 }
529 *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);
530 }
531 }
532 opj_free(lCurrentData);
533 return OPJ_TRUE;
534 }
535
opj_calculate_norms(OPJ_FLOAT64 * pNorms,OPJ_UINT32 pNbComps,OPJ_FLOAT32 * pMatrix)536 void opj_calculate_norms( OPJ_FLOAT64 * pNorms,
537 OPJ_UINT32 pNbComps,
538 OPJ_FLOAT32 * pMatrix)
539 {
540 OPJ_UINT32 i,j,lIndex;
541 OPJ_FLOAT32 lCurrentValue;
542 OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
543 OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
544
545 for (i=0;i<pNbComps;++i) {
546 lNorms[i] = 0;
547 lIndex = i;
548
549 for (j=0;j<pNbComps;++j) {
550 lCurrentValue = lMatrix[lIndex];
551 lIndex += pNbComps;
552 lNorms[i] += lCurrentValue * lCurrentValue;
553 }
554 lNorms[i] = sqrt(lNorms[i]);
555 }
556 }
557