1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "cpu.h"
20 #include "qsort.h"
21 #include "bprint.h"
22
23 #include "tx_priv.h"
24
25 #define TYPE_IS(type, x) \
26 (((x) == AV_TX_FLOAT_ ## type) || \
27 ((x) == AV_TX_DOUBLE_ ## type) || \
28 ((x) == AV_TX_INT32_ ## type))
29
30 /* Calculates the modular multiplicative inverse */
mulinv(int n,int m)31 static av_always_inline int mulinv(int n, int m)
32 {
33 n = n % m;
34 for (int x = 1; x < m; x++)
35 if (((n * x) % m) == 1)
36 return x;
37 av_assert0(0); /* Never reached */
38 return 0;
39 }
40
41 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
ff_tx_gen_compound_mapping(AVTXContext * s,int n,int m)42 int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
43 {
44 int *in_map, *out_map;
45 const int inv = s->inv;
46 const int len = n*m; /* Will not be equal to s->len for MDCTs */
47 const int mdct = TYPE_IS(MDCT, s->type);
48 int m_inv, n_inv;
49
50 /* Make sure the numbers are coprime */
51 if (av_gcd(n, m) != 1)
52 return AVERROR(EINVAL);
53
54 m_inv = mulinv(m, n);
55 n_inv = mulinv(n, m);
56
57 if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
58 return AVERROR(ENOMEM);
59
60 in_map = s->map;
61 out_map = s->map + len;
62
63 /* Ruritanian map for input, CRT map for output, can be swapped */
64 for (int j = 0; j < m; j++) {
65 for (int i = 0; i < n; i++) {
66 /* Shifted by 1 to simplify MDCTs */
67 in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
68 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
69 }
70 }
71
72 /* Change transform direction by reversing all ACs */
73 if (inv) {
74 for (int i = 0; i < m; i++) {
75 int *in = &in_map[i*n + 1]; /* Skip the DC */
76 for (int j = 0; j < ((n - 1) >> 1); j++)
77 FFSWAP(int, in[j], in[n - j - 2]);
78 }
79 }
80
81 /* Our 15-point transform is also a compound one, so embed its input map */
82 if (n == 15) {
83 for (int k = 0; k < m; k++) {
84 int tmp[15];
85 memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
86 for (int i = 0; i < 5; i++) {
87 for (int j = 0; j < 3; j++)
88 in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
89 }
90 }
91 }
92
93 return 0;
94 }
95
split_radix_permutation(int i,int len,int inv)96 static inline int split_radix_permutation(int i, int len, int inv)
97 {
98 len >>= 1;
99 if (len <= 1)
100 return i & 1;
101 if (!(i & len))
102 return split_radix_permutation(i, len, inv) * 2;
103 len >>= 1;
104 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
105 }
106
ff_tx_gen_ptwo_revtab(AVTXContext * s,int invert_lookup)107 int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
108 {
109 int len = s->len;
110
111 if (!(s->map = av_malloc(len*sizeof(*s->map))))
112 return AVERROR(ENOMEM);
113
114 if (invert_lookup) {
115 for (int i = 0; i < s->len; i++)
116 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
117 } else {
118 for (int i = 0; i < s->len; i++)
119 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
120 }
121
122 return 0;
123 }
124
ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext * s)125 int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
126 {
127 int *src_map, out_map_idx = 0, len = s->len;
128
129 if (!s->sub || !s->sub->map)
130 return AVERROR(EINVAL);
131
132 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
133 return AVERROR(ENOMEM);
134
135 src_map = s->sub->map;
136
137 /* The first coefficient is always already in-place */
138 for (int src = 1; src < s->len; src++) {
139 int dst = src_map[src];
140 int found = 0;
141
142 if (dst <= src)
143 continue;
144
145 /* This just checks if a closed loop has been encountered before,
146 * and if so, skips it, since to fully permute a loop we must only
147 * enter it once. */
148 do {
149 for (int j = 0; j < out_map_idx; j++) {
150 if (dst == s->map[j]) {
151 found = 1;
152 break;
153 }
154 }
155 dst = src_map[dst];
156 } while (dst != src && !found);
157
158 if (!found)
159 s->map[out_map_idx++] = src;
160 }
161
162 s->map[out_map_idx++] = 0;
163
164 return 0;
165 }
166
parity_revtab_generator(int * revtab,int n,int inv,int offset,int is_dual,int dual_high,int len,int basis,int dual_stride,int inv_lookup)167 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
168 int is_dual, int dual_high, int len,
169 int basis, int dual_stride, int inv_lookup)
170 {
171 len >>= 1;
172
173 if (len <= basis) {
174 int k1, k2, stride, even_idx, odd_idx;
175
176 is_dual = is_dual && dual_stride;
177 dual_high = is_dual & dual_high;
178 stride = is_dual ? FFMIN(dual_stride, len) : 0;
179
180 even_idx = offset + dual_high*(stride - 2*len);
181 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
182
183 for (int i = 0; i < len; i++) {
184 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
185 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
186 if (inv_lookup) {
187 revtab[even_idx++] = k1;
188 revtab[odd_idx++] = k2;
189 } else {
190 revtab[k1] = even_idx++;
191 revtab[k2] = odd_idx++;
192 }
193 if (stride && !((i + 1) % stride)) {
194 even_idx += stride;
195 odd_idx += stride;
196 }
197 }
198
199 return;
200 }
201
202 parity_revtab_generator(revtab, n, inv, offset,
203 0, 0, len >> 0, basis, dual_stride, inv_lookup);
204 parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
205 1, 0, len >> 1, basis, dual_stride, inv_lookup);
206 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
207 1, 1, len >> 1, basis, dual_stride, inv_lookup);
208 }
209
ff_tx_gen_split_radix_parity_revtab(AVTXContext * s,int invert_lookup,int basis,int dual_stride)210 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int invert_lookup,
211 int basis, int dual_stride)
212 {
213 int len = s->len;
214 int inv = s->inv;
215
216 if (!(s->map = av_mallocz(len*sizeof(*s->map))))
217 return AVERROR(ENOMEM);
218
219 basis >>= 1;
220 if (len < basis)
221 return AVERROR(EINVAL);
222
223 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
224 av_assert0(dual_stride <= basis);
225 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
226 basis, dual_stride, invert_lookup);
227
228 return 0;
229 }
230
reset_ctx(AVTXContext * s)231 static void reset_ctx(AVTXContext *s)
232 {
233 if (!s)
234 return;
235
236 if (s->sub)
237 for (int i = 0; i < s->nb_sub; i++)
238 reset_ctx(&s->sub[i]);
239
240 if (s->cd_self->uninit)
241 s->cd_self->uninit(s);
242
243 av_freep(&s->sub);
244 av_freep(&s->map);
245 av_freep(&s->exp);
246 av_freep(&s->tmp);
247
248 memset(s, 0, sizeof(*s));
249 }
250
av_tx_uninit(AVTXContext ** ctx)251 av_cold void av_tx_uninit(AVTXContext **ctx)
252 {
253 if (!(*ctx))
254 return;
255
256 reset_ctx(*ctx);
257 av_freep(ctx);
258 }
259
ff_tx_null_init(AVTXContext * s,const FFTXCodelet * cd,uint64_t flags,FFTXCodeletOptions * opts,int len,int inv,const void * scale)260 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
261 uint64_t flags, FFTXCodeletOptions *opts,
262 int len, int inv, const void *scale)
263 {
264 /* Can only handle one sample+type to one sample+type transforms */
265 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
266 return AVERROR(EINVAL);
267 return 0;
268 }
269
270 /* Null transform when the length is 1 */
ff_tx_null(AVTXContext * s,void * _out,void * _in,ptrdiff_t stride)271 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
272 {
273 memcpy(_out, _in, stride);
274 }
275
276 static const FFTXCodelet ff_tx_null_def = {
277 .name = NULL_IF_CONFIG_SMALL("null"),
278 .function = ff_tx_null,
279 .type = TX_TYPE_ANY,
280 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
281 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
282 .factors[0] = TX_FACTOR_ANY,
283 .min_len = 1,
284 .max_len = 1,
285 .init = ff_tx_null_init,
286 .cpu_flags = FF_TX_CPU_FLAGS_ALL,
287 .prio = FF_TX_PRIO_MAX,
288 };
289
290 static const FFTXCodelet * const ff_tx_null_list[] = {
291 &ff_tx_null_def,
292 NULL,
293 };
294
295 #if !CONFIG_SMALL
print_flags(AVBPrint * bp,uint64_t f)296 static void print_flags(AVBPrint *bp, uint64_t f)
297 {
298 int prev = 0;
299 const char *sep = ", ";
300 av_bprintf(bp, "flags: [");
301 if ((f & FF_TX_ALIGNED) && ++prev)
302 av_bprintf(bp, "aligned");
303 if ((f & AV_TX_UNALIGNED) && ++prev)
304 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
305 if ((f & AV_TX_INPLACE) && ++prev)
306 av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
307 if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
308 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
309 if ((f & FF_TX_FORWARD_ONLY) && ++prev)
310 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
311 if ((f & FF_TX_INVERSE_ONLY) && ++prev)
312 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
313 if ((f & FF_TX_PRESHUFFLE) && ++prev)
314 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
315 if ((f & AV_TX_FULL_IMDCT) && ++prev)
316 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
317 av_bprintf(bp, "]");
318 }
319
print_type(AVBPrint * bp,enum AVTXType type)320 static void print_type(AVBPrint *bp, enum AVTXType type)
321 {
322 av_bprintf(bp, "%s",
323 type == TX_TYPE_ANY ? "any" :
324 type == AV_TX_FLOAT_FFT ? "fft_float" :
325 type == AV_TX_FLOAT_MDCT ? "mdct_float" :
326 type == AV_TX_FLOAT_RDFT ? "rdft_float" :
327 type == AV_TX_DOUBLE_FFT ? "fft_double" :
328 type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
329 type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
330 type == AV_TX_INT32_FFT ? "fft_int32" :
331 type == AV_TX_INT32_MDCT ? "mdct_int32" :
332 type == AV_TX_INT32_RDFT ? "rdft_int32" :
333 "unknown");
334 }
335
print_cd_info(const FFTXCodelet * cd,int prio,int print_prio)336 static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
337 {
338 AVBPrint bp = { 0 };
339 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
340
341 av_bprintf(&bp, "%s - type: ", cd->name);
342
343 print_type(&bp, cd->type);
344
345 av_bprintf(&bp, ", len: ");
346 if (cd->min_len != cd->max_len)
347 av_bprintf(&bp, "[%i, ", cd->min_len);
348
349 if (cd->max_len == TX_LEN_UNLIMITED)
350 av_bprintf(&bp, "∞");
351 else
352 av_bprintf(&bp, "%i", cd->max_len);
353
354 av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : "");
355 for (int i = 0; i < TX_MAX_SUB; i++) {
356 if (i && cd->factors[i])
357 av_bprintf(&bp, ", ");
358 if (cd->factors[i] == TX_FACTOR_ANY)
359 av_bprintf(&bp, "any");
360 else if (cd->factors[i])
361 av_bprintf(&bp, "%i", cd->factors[i]);
362 else
363 break;
364 }
365
366 av_bprintf(&bp, "], ");
367 print_flags(&bp, cd->flags);
368
369 if (print_prio)
370 av_bprintf(&bp, ", prio: %i", prio);
371
372 av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
373 }
374
print_tx_structure(AVTXContext * s,int depth)375 static void print_tx_structure(AVTXContext *s, int depth)
376 {
377 const FFTXCodelet *cd = s->cd_self;
378
379 for (int i = 0; i <= depth; i++)
380 av_log(NULL, AV_LOG_VERBOSE, " ");
381
382 print_cd_info(cd, cd->prio, 0);
383
384 for (int i = 0; i < s->nb_sub; i++)
385 print_tx_structure(&s->sub[i], depth + 1);
386 }
387 #endif /* CONFIG_SMALL */
388
389 typedef struct TXCodeletMatch {
390 const FFTXCodelet *cd;
391 int prio;
392 } TXCodeletMatch;
393
cmp_matches(TXCodeletMatch * a,TXCodeletMatch * b)394 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
395 {
396 return FFDIFFSIGN(b->prio, a->prio);
397 }
398
399 /* We want all factors to completely cover the length */
check_cd_factors(const FFTXCodelet * cd,int len)400 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
401 {
402 int all_flag = 0;
403
404 for (int i = 0; i < TX_MAX_SUB; i++) {
405 int factor = cd->factors[i];
406
407 /* Conditions satisfied */
408 if (len == 1)
409 return 1;
410
411 /* No more factors */
412 if (!factor) {
413 break;
414 } else if (factor == TX_FACTOR_ANY) {
415 all_flag = 1;
416 continue;
417 }
418
419 if (factor == 2) { /* Fast path */
420 int bits_2 = ff_ctz(len);
421 if (!bits_2)
422 return 0; /* Factor not supported */
423
424 len >>= bits_2;
425 } else {
426 int res = len % factor;
427 if (res)
428 return 0; /* Factor not supported */
429
430 while (!res) {
431 len /= factor;
432 res = len % factor;
433 }
434 }
435 }
436
437 return all_flag || (len == 1);
438 }
439
ff_tx_init_subtx(AVTXContext * s,enum AVTXType type,uint64_t flags,FFTXCodeletOptions * opts,int len,int inv,const void * scale)440 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
441 uint64_t flags, FFTXCodeletOptions *opts,
442 int len, int inv, const void *scale)
443 {
444 int ret = 0;
445 AVTXContext *sub = NULL;
446 TXCodeletMatch *cd_tmp, *cd_matches = NULL;
447 unsigned int cd_matches_size = 0;
448 int nb_cd_matches = 0;
449 #if !CONFIG_SMALL
450 AVBPrint bp = { 0 };
451 #endif
452
453 /* Array of all compiled codelet lists. Order is irrelevant. */
454 const FFTXCodelet * const * const codelet_list[] = {
455 ff_tx_codelet_list_float_c,
456 ff_tx_codelet_list_double_c,
457 ff_tx_codelet_list_int32_c,
458 ff_tx_null_list,
459 #if HAVE_X86ASM
460 ff_tx_codelet_list_float_x86,
461 #endif
462 };
463 int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
464
465 /* We still accept functions marked with SLOW, even if the CPU is
466 * marked with the same flag, but we give them lower priority. */
467 const int cpu_flags = av_get_cpu_flags();
468 const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
469 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW |
470 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER;
471
472 static const int slow_penalties[][2] = {
473 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
474 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
475 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
476 { AV_CPU_FLAG_ATOM, 1 + 128 },
477 { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
478 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
479 };
480
481 /* Flags the transform wants */
482 uint64_t req_flags = flags;
483
484 /* Flags the codelet may require to be present */
485 uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE;
486
487 /* Unaligned codelets are compatible with the aligned flag */
488 if (req_flags & FF_TX_ALIGNED)
489 req_flags |= AV_TX_UNALIGNED;
490
491 /* If either flag is set, both are okay, so don't check for an exact match */
492 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
493 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
494 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
495 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
496
497 /* Loop through all codelets in all codelet lists to find matches
498 * to the requirements */
499 while (codelet_list_num--) {
500 const FFTXCodelet * const * list = codelet_list[codelet_list_num];
501 const FFTXCodelet *cd = NULL;
502
503 while ((cd = *list++)) {
504 int max_factor = 0;
505
506 /* Check if the type matches */
507 if (cd->type != TX_TYPE_ANY && type != cd->type)
508 continue;
509
510 /* Check direction for non-orthogonal codelets */
511 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
512 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
513 continue;
514
515 /* Check if the requested flags match from both sides */
516 if (((req_flags & cd->flags) != (req_flags)) ||
517 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
518 continue;
519
520 /* Check if length is supported */
521 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
522 continue;
523
524 /* Check if the CPU supports the required ISA */
525 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
526 !(cpu_flags & (cd->cpu_flags & ~slow_mask)))
527 continue;
528
529 /* Check for factors */
530 if (!check_cd_factors(cd, len))
531 continue;
532
533 /* Realloc array and append */
534 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
535 sizeof(*cd_tmp) * (nb_cd_matches + 1));
536 if (!cd_tmp) {
537 av_free(cd_matches);
538 return AVERROR(ENOMEM);
539 }
540
541 cd_matches = cd_tmp;
542 cd_matches[nb_cd_matches].cd = cd;
543 cd_matches[nb_cd_matches].prio = cd->prio;
544
545 /* If the CPU has a SLOW flag, and the instruction is also flagged
546 * as being slow for such, reduce its priority */
547 for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
548 if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
549 cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
550 }
551
552 /* Prioritize aligned-only codelets */
553 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
554 cd_matches[nb_cd_matches].prio += 64;
555
556 /* Codelets for specific lengths are generally faster */
557 if ((len == cd->min_len) && (len == cd->max_len))
558 cd_matches[nb_cd_matches].prio += 64;
559
560 /* Forward-only or inverse-only transforms are generally better */
561 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
562 cd_matches[nb_cd_matches].prio += 64;
563
564 /* Larger factors are generally better */
565 for (int i = 0; i < TX_MAX_SUB; i++)
566 max_factor = FFMAX(cd->factors[i], max_factor);
567 if (max_factor)
568 cd_matches[nb_cd_matches].prio += 16*max_factor;
569
570 nb_cd_matches++;
571 }
572 }
573
574 #if !CONFIG_SMALL
575 /* Print debugging info */
576 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
577 av_bprintf(&bp, "For transform of length %i, %s, ", len,
578 inv ? "inverse" : "forward");
579 print_type(&bp, type);
580 av_bprintf(&bp, ", ");
581 print_flags(&bp, flags);
582 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
583 nb_cd_matches ? ":" : ".");
584 #endif
585
586 /* No matches found */
587 if (!nb_cd_matches)
588 return AVERROR(ENOSYS);
589
590 /* Sort the list */
591 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
592
593 #if !CONFIG_SMALL
594 av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
595
596 for (int i = 0; i < nb_cd_matches; i++) {
597 av_log(NULL, AV_LOG_VERBOSE, " %i: ", i + 1);
598 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1);
599 }
600 #endif
601
602 if (!s->sub) {
603 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
604 if (!sub) {
605 ret = AVERROR(ENOMEM);
606 goto end;
607 }
608 }
609
610 /* Attempt to initialize each */
611 for (int i = 0; i < nb_cd_matches; i++) {
612 const FFTXCodelet *cd = cd_matches[i].cd;
613 AVTXContext *sctx = &s->sub[s->nb_sub];
614
615 sctx->len = len;
616 sctx->inv = inv;
617 sctx->type = type;
618 sctx->flags = flags;
619 sctx->cd_self = cd;
620
621 s->fn[s->nb_sub] = cd->function;
622 s->cd[s->nb_sub] = cd;
623
624 ret = 0;
625 if (cd->init)
626 ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
627
628 if (ret >= 0) {
629 s->nb_sub++;
630 goto end;
631 }
632
633 s->fn[s->nb_sub] = NULL;
634 s->cd[s->nb_sub] = NULL;
635
636 reset_ctx(sctx);
637 if (ret == AVERROR(ENOMEM))
638 break;
639 }
640
641 if (!s->nb_sub)
642 av_freep(&s->sub);
643
644 end:
645 av_free(cd_matches);
646 return ret;
647 }
648
av_tx_init(AVTXContext ** ctx,av_tx_fn * tx,enum AVTXType type,int inv,int len,const void * scale,uint64_t flags)649 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
650 int inv, int len, const void *scale, uint64_t flags)
651 {
652 int ret;
653 AVTXContext tmp = { 0 };
654 const double default_scale_d = 1.0;
655 const float default_scale_f = 1.0f;
656
657 if (!len || type >= AV_TX_NB || !ctx || !tx)
658 return AVERROR(EINVAL);
659
660 if (!(flags & AV_TX_UNALIGNED))
661 flags |= FF_TX_ALIGNED;
662 if (!(flags & AV_TX_INPLACE))
663 flags |= FF_TX_OUT_OF_PLACE;
664
665 if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT)))
666 scale = &default_scale_f;
667 else if (!scale && (type == AV_TX_DOUBLE_MDCT))
668 scale = &default_scale_d;
669
670 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
671 if (ret < 0)
672 return ret;
673
674 *ctx = &tmp.sub[0];
675 *tx = tmp.fn[0];
676
677 #if !CONFIG_SMALL
678 av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n");
679 print_tx_structure(*ctx, 0);
680 #endif
681
682 return ret;
683 }
684