1 /*
2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license.
6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren
11 * Copyright (c) 2003-2007, Francois-Olivier Devaux
12 * Copyright (c) 2003-2014, Antonin Descampe
13 * Copyright (c) 2005, Herve Drolon, FreeImage Team
14 * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
15 * Copyright (c) 2012, CS Systemes d'Information, France
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386)
41 #define USE_SSE
42 #include <xmmintrin.h>
43 #endif
44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386)
45 #define USE_SSE2
46 #include <emmintrin.h>
47 #endif
48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
49 #define USE_SSE4
50 #include <smmintrin.h>
51 #endif
52
53 #include "opj_includes.h"
54
55 /* <summary> */
56 /* This table contains the norms of the basis function of the reversible MCT. */
57 /* </summary> */
58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
59
60 /* <summary> */
61 /* This table contains the norms of the basis function of the irreversible MCT. */
62 /* </summary> */
63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
64
opj_mct_get_mct_norms()65 const OPJ_FLOAT64 * opj_mct_get_mct_norms()
66 {
67 return opj_mct_norms;
68 }
69
opj_mct_get_mct_norms_real()70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real()
71 {
72 return opj_mct_norms_real;
73 }
74
75 /* <summary> */
76 /* Forward reversible MCT. */
77 /* </summary> */
78 #ifdef USE_SSE2
opj_mct_encode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)79 void opj_mct_encode(
80 OPJ_INT32* OPJ_RESTRICT c0,
81 OPJ_INT32* OPJ_RESTRICT c1,
82 OPJ_INT32* OPJ_RESTRICT c2,
83 OPJ_SIZE_T n)
84 {
85 OPJ_SIZE_T i;
86 const OPJ_SIZE_T len = n;
87 /* buffer are aligned on 16 bytes */
88 assert(((size_t)c0 & 0xf) == 0);
89 assert(((size_t)c1 & 0xf) == 0);
90 assert(((size_t)c2 & 0xf) == 0);
91
92 for (i = 0; i < (len & ~3U); i += 4) {
93 __m128i y, u, v;
94 __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
95 __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
96 __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
97 y = _mm_add_epi32(g, g);
98 y = _mm_add_epi32(y, b);
99 y = _mm_add_epi32(y, r);
100 y = _mm_srai_epi32(y, 2);
101 u = _mm_sub_epi32(b, g);
102 v = _mm_sub_epi32(r, g);
103 _mm_store_si128((__m128i *) & (c0[i]), y);
104 _mm_store_si128((__m128i *) & (c1[i]), u);
105 _mm_store_si128((__m128i *) & (c2[i]), v);
106 }
107
108 for (; i < len; ++i) {
109 OPJ_INT32 r = c0[i];
110 OPJ_INT32 g = c1[i];
111 OPJ_INT32 b = c2[i];
112 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
113 OPJ_INT32 u = b - g;
114 OPJ_INT32 v = r - g;
115 c0[i] = y;
116 c1[i] = u;
117 c2[i] = v;
118 }
119 }
120 #else
opj_mct_encode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)121 void opj_mct_encode(
122 OPJ_INT32* OPJ_RESTRICT c0,
123 OPJ_INT32* OPJ_RESTRICT c1,
124 OPJ_INT32* OPJ_RESTRICT c2,
125 OPJ_SIZE_T n)
126 {
127 OPJ_SIZE_T i;
128 const OPJ_SIZE_T len = n;
129
130 for (i = 0; i < len; ++i) {
131 OPJ_INT32 r = c0[i];
132 OPJ_INT32 g = c1[i];
133 OPJ_INT32 b = c2[i];
134 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
135 OPJ_INT32 u = b - g;
136 OPJ_INT32 v = r - g;
137 c0[i] = y;
138 c1[i] = u;
139 c2[i] = v;
140 }
141 }
142 #endif
143
144 /* <summary> */
145 /* Inverse reversible MCT. */
146 /* </summary> */
147 #ifdef USE_SSE2
opj_mct_decode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)148 void opj_mct_decode(
149 OPJ_INT32* OPJ_RESTRICT c0,
150 OPJ_INT32* OPJ_RESTRICT c1,
151 OPJ_INT32* OPJ_RESTRICT c2,
152 OPJ_SIZE_T n)
153 {
154 OPJ_SIZE_T i;
155 const OPJ_SIZE_T len = n;
156
157 for (i = 0; i < (len & ~3U); i += 4) {
158 __m128i r, g, b;
159 __m128i y = _mm_load_si128((const __m128i *) & (c0[i]));
160 __m128i u = _mm_load_si128((const __m128i *) & (c1[i]));
161 __m128i v = _mm_load_si128((const __m128i *) & (c2[i]));
162 g = y;
163 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
164 r = _mm_add_epi32(v, g);
165 b = _mm_add_epi32(u, g);
166 _mm_store_si128((__m128i *) & (c0[i]), r);
167 _mm_store_si128((__m128i *) & (c1[i]), g);
168 _mm_store_si128((__m128i *) & (c2[i]), b);
169 }
170 for (; i < len; ++i) {
171 OPJ_INT32 y = c0[i];
172 OPJ_INT32 u = c1[i];
173 OPJ_INT32 v = c2[i];
174 OPJ_INT32 g = y - ((u + v) >> 2);
175 OPJ_INT32 r = v + g;
176 OPJ_INT32 b = u + g;
177 c0[i] = r;
178 c1[i] = g;
179 c2[i] = b;
180 }
181 }
182 #else
opj_mct_decode(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)183 void opj_mct_decode(
184 OPJ_INT32* OPJ_RESTRICT c0,
185 OPJ_INT32* OPJ_RESTRICT c1,
186 OPJ_INT32* OPJ_RESTRICT c2,
187 OPJ_SIZE_T n)
188 {
189 OPJ_UINT32 i;
190 for (i = 0; i < n; ++i) {
191 OPJ_INT32 y = c0[i];
192 OPJ_INT32 u = c1[i];
193 OPJ_INT32 v = c2[i];
194 OPJ_INT32 g = y - ((u + v) >> 2);
195 OPJ_INT32 r = v + g;
196 OPJ_INT32 b = u + g;
197 c0[i] = r;
198 c1[i] = g;
199 c2[i] = b;
200 }
201 }
202 #endif
203
204 /* <summary> */
205 /* Get norm of basis function of reversible MCT. */
206 /* </summary> */
opj_mct_getnorm(OPJ_UINT32 compno)207 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno)
208 {
209 return opj_mct_norms[compno];
210 }
211
212 /* <summary> */
213 /* Forward irreversible MCT. */
214 /* </summary> */
215 #ifdef USE_SSE4
opj_mct_encode_real(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)216 void opj_mct_encode_real(
217 OPJ_INT32* OPJ_RESTRICT c0,
218 OPJ_INT32* OPJ_RESTRICT c1,
219 OPJ_INT32* OPJ_RESTRICT c2,
220 OPJ_SIZE_T n)
221 {
222 OPJ_SIZE_T i;
223 const OPJ_SIZE_T len = n;
224
225 const __m128i ry = _mm_set1_epi32(2449);
226 const __m128i gy = _mm_set1_epi32(4809);
227 const __m128i by = _mm_set1_epi32(934);
228 const __m128i ru = _mm_set1_epi32(1382);
229 const __m128i gu = _mm_set1_epi32(2714);
230 /* const __m128i bu = _mm_set1_epi32(4096); */
231 /* const __m128i rv = _mm_set1_epi32(4096); */
232 const __m128i gv = _mm_set1_epi32(3430);
233 const __m128i bv = _mm_set1_epi32(666);
234 const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096),
235 _MM_SHUFFLE(1, 0, 1, 0));
236
237 for (i = 0; i < (len & ~3U); i += 4) {
238 __m128i lo, hi;
239 __m128i y, u, v;
240 __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
241 __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
242 __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
243
244 lo = r;
245 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
246 lo = _mm_mul_epi32(lo, ry);
247 hi = _mm_mul_epi32(hi, ry);
248 lo = _mm_add_epi64(lo, mulround);
249 hi = _mm_add_epi64(hi, mulround);
250 lo = _mm_srli_epi64(lo, 13);
251 hi = _mm_slli_epi64(hi, 32 - 13);
252 y = _mm_blend_epi16(lo, hi, 0xCC);
253
254 lo = g;
255 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
256 lo = _mm_mul_epi32(lo, gy);
257 hi = _mm_mul_epi32(hi, gy);
258 lo = _mm_add_epi64(lo, mulround);
259 hi = _mm_add_epi64(hi, mulround);
260 lo = _mm_srli_epi64(lo, 13);
261 hi = _mm_slli_epi64(hi, 32 - 13);
262 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
263
264 lo = b;
265 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
266 lo = _mm_mul_epi32(lo, by);
267 hi = _mm_mul_epi32(hi, by);
268 lo = _mm_add_epi64(lo, mulround);
269 hi = _mm_add_epi64(hi, mulround);
270 lo = _mm_srli_epi64(lo, 13);
271 hi = _mm_slli_epi64(hi, 32 - 13);
272 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
273 _mm_store_si128((__m128i *) & (c0[i]), y);
274
275 /*lo = b;
276 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
277 lo = _mm_mul_epi32(lo, mulround);
278 hi = _mm_mul_epi32(hi, mulround);*/
279 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
280 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
281 lo = _mm_slli_epi64(lo, 12);
282 hi = _mm_slli_epi64(hi, 12);
283 lo = _mm_add_epi64(lo, mulround);
284 hi = _mm_add_epi64(hi, mulround);
285 lo = _mm_srli_epi64(lo, 13);
286 hi = _mm_slli_epi64(hi, 32 - 13);
287 u = _mm_blend_epi16(lo, hi, 0xCC);
288
289 lo = r;
290 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
291 lo = _mm_mul_epi32(lo, ru);
292 hi = _mm_mul_epi32(hi, ru);
293 lo = _mm_add_epi64(lo, mulround);
294 hi = _mm_add_epi64(hi, mulround);
295 lo = _mm_srli_epi64(lo, 13);
296 hi = _mm_slli_epi64(hi, 32 - 13);
297 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
298
299 lo = g;
300 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
301 lo = _mm_mul_epi32(lo, gu);
302 hi = _mm_mul_epi32(hi, gu);
303 lo = _mm_add_epi64(lo, mulround);
304 hi = _mm_add_epi64(hi, mulround);
305 lo = _mm_srli_epi64(lo, 13);
306 hi = _mm_slli_epi64(hi, 32 - 13);
307 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
308 _mm_store_si128((__m128i *) & (c1[i]), u);
309
310 /*lo = r;
311 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
312 lo = _mm_mul_epi32(lo, mulround);
313 hi = _mm_mul_epi32(hi, mulround);*/
314 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
315 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
316 lo = _mm_slli_epi64(lo, 12);
317 hi = _mm_slli_epi64(hi, 12);
318 lo = _mm_add_epi64(lo, mulround);
319 hi = _mm_add_epi64(hi, mulround);
320 lo = _mm_srli_epi64(lo, 13);
321 hi = _mm_slli_epi64(hi, 32 - 13);
322 v = _mm_blend_epi16(lo, hi, 0xCC);
323
324 lo = g;
325 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
326 lo = _mm_mul_epi32(lo, gv);
327 hi = _mm_mul_epi32(hi, gv);
328 lo = _mm_add_epi64(lo, mulround);
329 hi = _mm_add_epi64(hi, mulround);
330 lo = _mm_srli_epi64(lo, 13);
331 hi = _mm_slli_epi64(hi, 32 - 13);
332 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
333
334 lo = b;
335 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
336 lo = _mm_mul_epi32(lo, bv);
337 hi = _mm_mul_epi32(hi, bv);
338 lo = _mm_add_epi64(lo, mulround);
339 hi = _mm_add_epi64(hi, mulround);
340 lo = _mm_srli_epi64(lo, 13);
341 hi = _mm_slli_epi64(hi, 32 - 13);
342 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
343 _mm_store_si128((__m128i *) & (c2[i]), v);
344 }
345 for (; i < len; ++i) {
346 OPJ_INT32 r = c0[i];
347 OPJ_INT32 g = c1[i];
348 OPJ_INT32 b = c2[i];
349 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
350 4809) + opj_int_fix_mul(b, 934);
351 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
352 2714) + opj_int_fix_mul(b, 4096);
353 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
354 3430) - opj_int_fix_mul(b, 666);
355 c0[i] = y;
356 c1[i] = u;
357 c2[i] = v;
358 }
359 }
360 #else
opj_mct_encode_real(OPJ_INT32 * OPJ_RESTRICT c0,OPJ_INT32 * OPJ_RESTRICT c1,OPJ_INT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)361 void opj_mct_encode_real(
362 OPJ_INT32* OPJ_RESTRICT c0,
363 OPJ_INT32* OPJ_RESTRICT c1,
364 OPJ_INT32* OPJ_RESTRICT c2,
365 OPJ_SIZE_T n)
366 {
367 OPJ_UINT32 i;
368 for (i = 0; i < n; ++i) {
369 OPJ_INT32 r = c0[i];
370 OPJ_INT32 g = c1[i];
371 OPJ_INT32 b = c2[i];
372 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
373 4809) + opj_int_fix_mul(b, 934);
374 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
375 2714) + opj_int_fix_mul(b, 4096);
376 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
377 3430) - opj_int_fix_mul(b, 666);
378 c0[i] = y;
379 c1[i] = u;
380 c2[i] = v;
381 }
382 }
383 #endif
384
385 /* <summary> */
386 /* Inverse irreversible MCT. */
387 /* </summary> */
opj_mct_decode_real(OPJ_FLOAT32 * OPJ_RESTRICT c0,OPJ_FLOAT32 * OPJ_RESTRICT c1,OPJ_FLOAT32 * OPJ_RESTRICT c2,OPJ_SIZE_T n)388 void opj_mct_decode_real(
389 OPJ_FLOAT32* OPJ_RESTRICT c0,
390 OPJ_FLOAT32* OPJ_RESTRICT c1,
391 OPJ_FLOAT32* OPJ_RESTRICT c2,
392 OPJ_SIZE_T n)
393 {
394 OPJ_UINT32 i;
395 #ifdef USE_SSE
396 __m128 vrv, vgu, vgv, vbu;
397 vrv = _mm_set1_ps(1.402f);
398 vgu = _mm_set1_ps(0.34413f);
399 vgv = _mm_set1_ps(0.71414f);
400 vbu = _mm_set1_ps(1.772f);
401 for (i = 0; i < (n >> 3); ++i) {
402 __m128 vy, vu, vv;
403 __m128 vr, vg, vb;
404
405 vy = _mm_load_ps(c0);
406 vu = _mm_load_ps(c1);
407 vv = _mm_load_ps(c2);
408 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
409 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
410 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
411 _mm_store_ps(c0, vr);
412 _mm_store_ps(c1, vg);
413 _mm_store_ps(c2, vb);
414 c0 += 4;
415 c1 += 4;
416 c2 += 4;
417
418 vy = _mm_load_ps(c0);
419 vu = _mm_load_ps(c1);
420 vv = _mm_load_ps(c2);
421 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
422 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
423 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
424 _mm_store_ps(c0, vr);
425 _mm_store_ps(c1, vg);
426 _mm_store_ps(c2, vb);
427 c0 += 4;
428 c1 += 4;
429 c2 += 4;
430 }
431 n &= 7;
432 #endif
433 for (i = 0; i < n; ++i) {
434 OPJ_FLOAT32 y = c0[i];
435 OPJ_FLOAT32 u = c1[i];
436 OPJ_FLOAT32 v = c2[i];
437 OPJ_FLOAT32 r = y + (v * 1.402f);
438 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
439 OPJ_FLOAT32 b = y + (u * 1.772f);
440 c0[i] = r;
441 c1[i] = g;
442 c2[i] = b;
443 }
444 }
445
446 /* <summary> */
447 /* Get norm of basis function of irreversible MCT. */
448 /* </summary> */
opj_mct_getnorm_real(OPJ_UINT32 compno)449 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno)
450 {
451 return opj_mct_norms_real[compno];
452 }
453
454
opj_mct_encode_custom(OPJ_BYTE * pCodingdata,OPJ_SIZE_T n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)455 OPJ_BOOL opj_mct_encode_custom(
456 OPJ_BYTE * pCodingdata,
457 OPJ_SIZE_T n,
458 OPJ_BYTE ** pData,
459 OPJ_UINT32 pNbComp,
460 OPJ_UINT32 isSigned)
461 {
462 OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
463 OPJ_SIZE_T i;
464 OPJ_UINT32 j;
465 OPJ_UINT32 k;
466 OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
467 OPJ_INT32 * lCurrentData = 00;
468 OPJ_INT32 * lCurrentMatrix = 00;
469 OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
470 OPJ_UINT32 lMultiplicator = 1 << 13;
471 OPJ_INT32 * lMctPtr;
472
473 OPJ_ARG_NOT_USED(isSigned);
474
475 lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(
476 OPJ_INT32));
477 if (! lCurrentData) {
478 return OPJ_FALSE;
479 }
480
481 lCurrentMatrix = lCurrentData + pNbComp;
482
483 for (i = 0; i < lNbMatCoeff; ++i) {
484 lCurrentMatrix[i] = (OPJ_INT32)(*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
485 }
486
487 for (i = 0; i < n; ++i) {
488 lMctPtr = lCurrentMatrix;
489 for (j = 0; j < pNbComp; ++j) {
490 lCurrentData[j] = (*(lData[j]));
491 }
492
493 for (j = 0; j < pNbComp; ++j) {
494 *(lData[j]) = 0;
495 for (k = 0; k < pNbComp; ++k) {
496 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
497 ++lMctPtr;
498 }
499
500 ++lData[j];
501 }
502 }
503
504 opj_free(lCurrentData);
505
506 return OPJ_TRUE;
507 }
508
opj_mct_decode_custom(OPJ_BYTE * pDecodingData,OPJ_SIZE_T n,OPJ_BYTE ** pData,OPJ_UINT32 pNbComp,OPJ_UINT32 isSigned)509 OPJ_BOOL opj_mct_decode_custom(
510 OPJ_BYTE * pDecodingData,
511 OPJ_SIZE_T n,
512 OPJ_BYTE ** pData,
513 OPJ_UINT32 pNbComp,
514 OPJ_UINT32 isSigned)
515 {
516 OPJ_FLOAT32 * lMct;
517 OPJ_SIZE_T i;
518 OPJ_UINT32 j;
519 OPJ_UINT32 k;
520
521 OPJ_FLOAT32 * lCurrentData = 00;
522 OPJ_FLOAT32 * lCurrentResult = 00;
523 OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
524
525 OPJ_ARG_NOT_USED(isSigned);
526
527 lCurrentData = (OPJ_FLOAT32 *) opj_malloc(2 * pNbComp * sizeof(OPJ_FLOAT32));
528 if (! lCurrentData) {
529 return OPJ_FALSE;
530 }
531 lCurrentResult = lCurrentData + pNbComp;
532
533 for (i = 0; i < n; ++i) {
534 lMct = (OPJ_FLOAT32 *) pDecodingData;
535 for (j = 0; j < pNbComp; ++j) {
536 lCurrentData[j] = (OPJ_FLOAT32)(*(lData[j]));
537 }
538 for (j = 0; j < pNbComp; ++j) {
539 lCurrentResult[j] = 0;
540 for (k = 0; k < pNbComp; ++k) {
541 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
542 }
543 *(lData[j]++) = (OPJ_FLOAT32)(lCurrentResult[j]);
544 }
545 }
546 opj_free(lCurrentData);
547 return OPJ_TRUE;
548 }
549
opj_calculate_norms(OPJ_FLOAT64 * pNorms,OPJ_UINT32 pNbComps,OPJ_FLOAT32 * pMatrix)550 void opj_calculate_norms(OPJ_FLOAT64 * pNorms,
551 OPJ_UINT32 pNbComps,
552 OPJ_FLOAT32 * pMatrix)
553 {
554 OPJ_UINT32 i, j, lIndex;
555 OPJ_FLOAT32 lCurrentValue;
556 OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
557 OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
558
559 for (i = 0; i < pNbComps; ++i) {
560 lNorms[i] = 0;
561 lIndex = i;
562
563 for (j = 0; j < pNbComps; ++j) {
564 lCurrentValue = lMatrix[lIndex];
565 lIndex += pNbComps;
566 lNorms[i] += lCurrentValue * lCurrentValue;
567 }
568 lNorms[i] = sqrt(lNorms[i]);
569 }
570 }
571