• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "cpu.h"
20 #include "qsort.h"
21 #include "bprint.h"
22 
23 #include "tx_priv.h"
24 
25 #define TYPE_IS(type, x)               \
26     (((x) == AV_TX_FLOAT_ ## type)  || \
27      ((x) == AV_TX_DOUBLE_ ## type) || \
28      ((x) == AV_TX_INT32_ ## type))
29 
30 /* Calculates the modular multiplicative inverse */
mulinv(int n,int m)31 static av_always_inline int mulinv(int n, int m)
32 {
33     n = n % m;
34     for (int x = 1; x < m; x++)
35         if (((n * x) % m) == 1)
36             return x;
37     av_assert0(0); /* Never reached */
38     return 0;
39 }
40 
41 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
ff_tx_gen_compound_mapping(AVTXContext * s,int n,int m)42 int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
43 {
44     int *in_map, *out_map;
45     const int inv = s->inv;
46     const int len = n*m;    /* Will not be equal to s->len for MDCTs */
47     const int mdct = TYPE_IS(MDCT, s->type);
48     int m_inv, n_inv;
49 
50     /* Make sure the numbers are coprime */
51     if (av_gcd(n, m) != 1)
52         return AVERROR(EINVAL);
53 
54     m_inv = mulinv(m, n);
55     n_inv = mulinv(n, m);
56 
57     if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
58         return AVERROR(ENOMEM);
59 
60     in_map  = s->map;
61     out_map = s->map + len;
62 
63     /* Ruritanian map for input, CRT map for output, can be swapped */
64     for (int j = 0; j < m; j++) {
65         for (int i = 0; i < n; i++) {
66             /* Shifted by 1 to simplify MDCTs */
67             in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
68             out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
69         }
70     }
71 
72     /* Change transform direction by reversing all ACs */
73     if (inv) {
74         for (int i = 0; i < m; i++) {
75             int *in = &in_map[i*n + 1]; /* Skip the DC */
76             for (int j = 0; j < ((n - 1) >> 1); j++)
77                 FFSWAP(int, in[j], in[n - j - 2]);
78         }
79     }
80 
81     /* Our 15-point transform is also a compound one, so embed its input map */
82     if (n == 15) {
83         for (int k = 0; k < m; k++) {
84             int tmp[15];
85             memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
86             for (int i = 0; i < 5; i++) {
87                 for (int j = 0; j < 3; j++)
88                     in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
89             }
90         }
91     }
92 
93     return 0;
94 }
95 
split_radix_permutation(int i,int len,int inv)96 static inline int split_radix_permutation(int i, int len, int inv)
97 {
98     len >>= 1;
99     if (len <= 1)
100         return i & 1;
101     if (!(i & len))
102         return split_radix_permutation(i, len, inv) * 2;
103     len >>= 1;
104     return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
105 }
106 
ff_tx_gen_ptwo_revtab(AVTXContext * s,int invert_lookup)107 int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
108 {
109     int len = s->len;
110 
111     if (!(s->map = av_malloc(len*sizeof(*s->map))))
112         return AVERROR(ENOMEM);
113 
114     if (invert_lookup) {
115         for (int i = 0; i < s->len; i++)
116             s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
117     } else {
118         for (int i = 0; i < s->len; i++)
119             s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
120     }
121 
122     return 0;
123 }
124 
ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext * s)125 int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
126 {
127     int *src_map, out_map_idx = 0, len = s->len;
128 
129     if (!s->sub || !s->sub->map)
130         return AVERROR(EINVAL);
131 
132     if (!(s->map = av_mallocz(len*sizeof(*s->map))))
133         return AVERROR(ENOMEM);
134 
135     src_map = s->sub->map;
136 
137     /* The first coefficient is always already in-place */
138     for (int src = 1; src < s->len; src++) {
139         int dst = src_map[src];
140         int found = 0;
141 
142         if (dst <= src)
143             continue;
144 
145         /* This just checks if a closed loop has been encountered before,
146          * and if so, skips it, since to fully permute a loop we must only
147          * enter it once. */
148         do {
149             for (int j = 0; j < out_map_idx; j++) {
150                 if (dst == s->map[j]) {
151                     found = 1;
152                     break;
153                 }
154             }
155             dst = src_map[dst];
156         } while (dst != src && !found);
157 
158         if (!found)
159             s->map[out_map_idx++] = src;
160     }
161 
162     s->map[out_map_idx++] = 0;
163 
164     return 0;
165 }
166 
parity_revtab_generator(int * revtab,int n,int inv,int offset,int is_dual,int dual_high,int len,int basis,int dual_stride,int inv_lookup)167 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
168                                     int is_dual, int dual_high, int len,
169                                     int basis, int dual_stride, int inv_lookup)
170 {
171     len >>= 1;
172 
173     if (len <= basis) {
174         int k1, k2, stride, even_idx, odd_idx;
175 
176         is_dual = is_dual && dual_stride;
177         dual_high = is_dual & dual_high;
178         stride = is_dual ? FFMIN(dual_stride, len) : 0;
179 
180         even_idx = offset + dual_high*(stride - 2*len);
181         odd_idx  = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
182 
183         for (int i = 0; i < len; i++) {
184             k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
185             k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
186             if (inv_lookup) {
187                 revtab[even_idx++] = k1;
188                 revtab[odd_idx++]  = k2;
189             } else {
190                 revtab[k1] = even_idx++;
191                 revtab[k2] = odd_idx++;
192             }
193             if (stride && !((i + 1) % stride)) {
194                 even_idx += stride;
195                 odd_idx  += stride;
196             }
197         }
198 
199         return;
200     }
201 
202     parity_revtab_generator(revtab, n, inv, offset,
203                             0, 0, len >> 0, basis, dual_stride, inv_lookup);
204     parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
205                             1, 0, len >> 1, basis, dual_stride, inv_lookup);
206     parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
207                             1, 1, len >> 1, basis, dual_stride, inv_lookup);
208 }
209 
ff_tx_gen_split_radix_parity_revtab(AVTXContext * s,int invert_lookup,int basis,int dual_stride)210 int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int invert_lookup,
211                                         int basis, int dual_stride)
212 {
213     int len = s->len;
214     int inv = s->inv;
215 
216     if (!(s->map = av_mallocz(len*sizeof(*s->map))))
217         return AVERROR(ENOMEM);
218 
219     basis >>= 1;
220     if (len < basis)
221         return AVERROR(EINVAL);
222 
223     av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
224     av_assert0(dual_stride <= basis);
225     parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
226                             basis, dual_stride, invert_lookup);
227 
228     return 0;
229 }
230 
reset_ctx(AVTXContext * s)231 static void reset_ctx(AVTXContext *s)
232 {
233     if (!s)
234         return;
235 
236     if (s->sub)
237         for (int i = 0; i < s->nb_sub; i++)
238             reset_ctx(&s->sub[i]);
239 
240     if (s->cd_self->uninit)
241         s->cd_self->uninit(s);
242 
243     av_freep(&s->sub);
244     av_freep(&s->map);
245     av_freep(&s->exp);
246     av_freep(&s->tmp);
247 
248     memset(s, 0, sizeof(*s));
249 }
250 
av_tx_uninit(AVTXContext ** ctx)251 av_cold void av_tx_uninit(AVTXContext **ctx)
252 {
253     if (!(*ctx))
254         return;
255 
256     reset_ctx(*ctx);
257     av_freep(ctx);
258 }
259 
ff_tx_null_init(AVTXContext * s,const FFTXCodelet * cd,uint64_t flags,FFTXCodeletOptions * opts,int len,int inv,const void * scale)260 static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
261                                    uint64_t flags, FFTXCodeletOptions *opts,
262                                    int len, int inv, const void *scale)
263 {
264     /* Can only handle one sample+type to one sample+type transforms */
265     if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
266         return AVERROR(EINVAL);
267     return 0;
268 }
269 
270 /* Null transform when the length is 1 */
ff_tx_null(AVTXContext * s,void * _out,void * _in,ptrdiff_t stride)271 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
272 {
273     memcpy(_out, _in, stride);
274 }
275 
276 static const FFTXCodelet ff_tx_null_def = {
277     .name       = NULL_IF_CONFIG_SMALL("null"),
278     .function   = ff_tx_null,
279     .type       = TX_TYPE_ANY,
280     .flags      = AV_TX_UNALIGNED | FF_TX_ALIGNED |
281                   FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
282     .factors[0] = TX_FACTOR_ANY,
283     .min_len    = 1,
284     .max_len    = 1,
285     .init       = ff_tx_null_init,
286     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
287     .prio       = FF_TX_PRIO_MAX,
288 };
289 
290 static const FFTXCodelet * const ff_tx_null_list[] = {
291     &ff_tx_null_def,
292     NULL,
293 };
294 
295 #if !CONFIG_SMALL
print_flags(AVBPrint * bp,uint64_t f)296 static void print_flags(AVBPrint *bp, uint64_t f)
297 {
298     int prev = 0;
299     const char *sep = ", ";
300     av_bprintf(bp, "flags: [");
301     if ((f & FF_TX_ALIGNED) && ++prev)
302         av_bprintf(bp, "aligned");
303     if ((f & AV_TX_UNALIGNED) && ++prev)
304         av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
305     if ((f & AV_TX_INPLACE) && ++prev)
306         av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
307     if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
308         av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
309     if ((f & FF_TX_FORWARD_ONLY) && ++prev)
310         av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
311     if ((f & FF_TX_INVERSE_ONLY) && ++prev)
312         av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
313     if ((f & FF_TX_PRESHUFFLE) && ++prev)
314         av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
315     if ((f & AV_TX_FULL_IMDCT) && ++prev)
316         av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
317     av_bprintf(bp, "]");
318 }
319 
print_type(AVBPrint * bp,enum AVTXType type)320 static void print_type(AVBPrint *bp, enum AVTXType type)
321 {
322     av_bprintf(bp, "%s",
323                type == TX_TYPE_ANY       ? "any"         :
324                type == AV_TX_FLOAT_FFT   ? "fft_float"   :
325                type == AV_TX_FLOAT_MDCT  ? "mdct_float"  :
326                type == AV_TX_FLOAT_RDFT  ? "rdft_float"  :
327                type == AV_TX_DOUBLE_FFT  ? "fft_double"  :
328                type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
329                type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
330                type == AV_TX_INT32_FFT   ? "fft_int32"   :
331                type == AV_TX_INT32_MDCT  ? "mdct_int32"  :
332                type == AV_TX_INT32_RDFT  ? "rdft_int32"  :
333                "unknown");
334 }
335 
print_cd_info(const FFTXCodelet * cd,int prio,int print_prio)336 static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
337 {
338     AVBPrint bp = { 0 };
339     av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
340 
341     av_bprintf(&bp, "%s - type: ", cd->name);
342 
343     print_type(&bp, cd->type);
344 
345     av_bprintf(&bp, ", len: ");
346     if (cd->min_len != cd->max_len)
347         av_bprintf(&bp, "[%i, ", cd->min_len);
348 
349     if (cd->max_len == TX_LEN_UNLIMITED)
350         av_bprintf(&bp, "∞");
351     else
352         av_bprintf(&bp, "%i", cd->max_len);
353 
354     av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : "");
355     for (int i = 0; i < TX_MAX_SUB; i++) {
356         if (i && cd->factors[i])
357             av_bprintf(&bp, ", ");
358         if (cd->factors[i] == TX_FACTOR_ANY)
359             av_bprintf(&bp, "any");
360         else if (cd->factors[i])
361             av_bprintf(&bp, "%i", cd->factors[i]);
362         else
363             break;
364     }
365 
366     av_bprintf(&bp, "], ");
367     print_flags(&bp, cd->flags);
368 
369     if (print_prio)
370         av_bprintf(&bp, ", prio: %i", prio);
371 
372     av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
373 }
374 
print_tx_structure(AVTXContext * s,int depth)375 static void print_tx_structure(AVTXContext *s, int depth)
376 {
377     const FFTXCodelet *cd = s->cd_self;
378 
379     for (int i = 0; i <= depth; i++)
380         av_log(NULL, AV_LOG_VERBOSE, "    ");
381 
382     print_cd_info(cd, cd->prio, 0);
383 
384     for (int i = 0; i < s->nb_sub; i++)
385         print_tx_structure(&s->sub[i], depth + 1);
386 }
387 #endif /* CONFIG_SMALL */
388 
389 typedef struct TXCodeletMatch {
390     const FFTXCodelet *cd;
391     int prio;
392 } TXCodeletMatch;
393 
cmp_matches(TXCodeletMatch * a,TXCodeletMatch * b)394 static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
395 {
396     return FFDIFFSIGN(b->prio, a->prio);
397 }
398 
399 /* We want all factors to completely cover the length */
check_cd_factors(const FFTXCodelet * cd,int len)400 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
401 {
402     int all_flag = 0;
403 
404     for (int i = 0; i < TX_MAX_SUB; i++) {
405         int factor = cd->factors[i];
406 
407         /* Conditions satisfied */
408         if (len == 1)
409             return 1;
410 
411         /* No more factors */
412         if (!factor) {
413             break;
414         } else if (factor == TX_FACTOR_ANY) {
415             all_flag = 1;
416             continue;
417         }
418 
419         if (factor == 2) { /* Fast path */
420             int bits_2 = ff_ctz(len);
421             if (!bits_2)
422                 return 0; /* Factor not supported */
423 
424             len >>= bits_2;
425         } else {
426             int res = len % factor;
427             if (res)
428                 return 0; /* Factor not supported */
429 
430             while (!res) {
431                 len /= factor;
432                 res = len % factor;
433             }
434         }
435     }
436 
437     return all_flag || (len == 1);
438 }
439 
ff_tx_init_subtx(AVTXContext * s,enum AVTXType type,uint64_t flags,FFTXCodeletOptions * opts,int len,int inv,const void * scale)440 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
441                              uint64_t flags, FFTXCodeletOptions *opts,
442                              int len, int inv, const void *scale)
443 {
444     int ret = 0;
445     AVTXContext *sub = NULL;
446     TXCodeletMatch *cd_tmp, *cd_matches = NULL;
447     unsigned int cd_matches_size = 0;
448     int nb_cd_matches = 0;
449 #if !CONFIG_SMALL
450     AVBPrint bp = { 0 };
451 #endif
452 
453     /* Array of all compiled codelet lists. Order is irrelevant. */
454     const FFTXCodelet * const * const codelet_list[] = {
455         ff_tx_codelet_list_float_c,
456         ff_tx_codelet_list_double_c,
457         ff_tx_codelet_list_int32_c,
458         ff_tx_null_list,
459 #if HAVE_X86ASM
460         ff_tx_codelet_list_float_x86,
461 #endif
462     };
463     int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
464 
465     /* We still accept functions marked with SLOW, even if the CPU is
466      * marked with the same flag, but we give them lower priority. */
467     const int cpu_flags = av_get_cpu_flags();
468     const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW  |
469                           AV_CPU_FLAG_ATOM     | AV_CPU_FLAG_SSSE3SLOW |
470                           AV_CPU_FLAG_AVXSLOW  | AV_CPU_FLAG_SLOW_GATHER;
471 
472     static const int slow_penalties[][2] = {
473         { AV_CPU_FLAG_SSE2SLOW,    1 + 64  },
474         { AV_CPU_FLAG_SSE3SLOW,    1 + 64  },
475         { AV_CPU_FLAG_SSSE3SLOW,   1 + 64  },
476         { AV_CPU_FLAG_ATOM,        1 + 128 },
477         { AV_CPU_FLAG_AVXSLOW,     1 + 128 },
478         { AV_CPU_FLAG_SLOW_GATHER, 1 + 32  },
479     };
480 
481     /* Flags the transform wants */
482     uint64_t req_flags = flags;
483 
484     /* Flags the codelet may require to be present */
485     uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE;
486 
487     /* Unaligned codelets are compatible with the aligned flag */
488     if (req_flags & FF_TX_ALIGNED)
489         req_flags |= AV_TX_UNALIGNED;
490 
491     /* If either flag is set, both are okay, so don't check for an exact match */
492     if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
493         req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
494     if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
495         req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
496 
497     /* Loop through all codelets in all codelet lists to find matches
498      * to the requirements */
499     while (codelet_list_num--) {
500         const FFTXCodelet * const * list = codelet_list[codelet_list_num];
501         const FFTXCodelet *cd = NULL;
502 
503         while ((cd = *list++)) {
504             int max_factor = 0;
505 
506             /* Check if the type matches */
507             if (cd->type != TX_TYPE_ANY && type != cd->type)
508                 continue;
509 
510             /* Check direction for non-orthogonal codelets */
511             if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
512                 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
513                 continue;
514 
515             /* Check if the requested flags match from both sides */
516             if (((req_flags    & cd->flags) != (req_flags)) ||
517                 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
518                 continue;
519 
520             /* Check if length is supported */
521             if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
522                 continue;
523 
524             /* Check if the CPU supports the required ISA */
525             if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
526                 !(cpu_flags & (cd->cpu_flags & ~slow_mask)))
527                 continue;
528 
529             /* Check for factors */
530             if (!check_cd_factors(cd, len))
531                 continue;
532 
533             /* Realloc array and append */
534             cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
535                                      sizeof(*cd_tmp) * (nb_cd_matches + 1));
536             if (!cd_tmp) {
537                 av_free(cd_matches);
538                 return AVERROR(ENOMEM);
539             }
540 
541             cd_matches                     = cd_tmp;
542             cd_matches[nb_cd_matches].cd   = cd;
543             cd_matches[nb_cd_matches].prio = cd->prio;
544 
545             /* If the CPU has a SLOW flag, and the instruction is also flagged
546              * as being slow for such, reduce its priority */
547             for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
548                 if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
549                     cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
550             }
551 
552             /* Prioritize aligned-only codelets */
553             if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
554                 cd_matches[nb_cd_matches].prio += 64;
555 
556             /* Codelets for specific lengths are generally faster */
557             if ((len == cd->min_len) && (len == cd->max_len))
558                 cd_matches[nb_cd_matches].prio += 64;
559 
560             /* Forward-only or inverse-only transforms are generally better */
561             if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
562                 cd_matches[nb_cd_matches].prio += 64;
563 
564             /* Larger factors are generally better */
565             for (int i = 0; i < TX_MAX_SUB; i++)
566                 max_factor = FFMAX(cd->factors[i], max_factor);
567             if (max_factor)
568                 cd_matches[nb_cd_matches].prio += 16*max_factor;
569 
570             nb_cd_matches++;
571         }
572     }
573 
574 #if !CONFIG_SMALL
575     /* Print debugging info */
576     av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
577     av_bprintf(&bp, "For transform of length %i, %s, ", len,
578                inv ? "inverse" : "forward");
579     print_type(&bp, type);
580     av_bprintf(&bp, ", ");
581     print_flags(&bp, flags);
582     av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
583                nb_cd_matches ? ":" : ".");
584 #endif
585 
586     /* No matches found */
587     if (!nb_cd_matches)
588         return AVERROR(ENOSYS);
589 
590     /* Sort the list */
591     AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
592 
593 #if !CONFIG_SMALL
594     av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
595 
596     for (int i = 0; i < nb_cd_matches; i++) {
597         av_log(NULL, AV_LOG_VERBOSE, "    %i: ", i + 1);
598         print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1);
599     }
600 #endif
601 
602     if (!s->sub) {
603         s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
604         if (!sub) {
605             ret = AVERROR(ENOMEM);
606             goto end;
607         }
608     }
609 
610     /* Attempt to initialize each */
611     for (int i = 0; i < nb_cd_matches; i++) {
612         const FFTXCodelet *cd = cd_matches[i].cd;
613         AVTXContext *sctx = &s->sub[s->nb_sub];
614 
615         sctx->len        = len;
616         sctx->inv        = inv;
617         sctx->type       = type;
618         sctx->flags      = flags;
619         sctx->cd_self    = cd;
620 
621         s->fn[s->nb_sub] = cd->function;
622         s->cd[s->nb_sub] = cd;
623 
624         ret = 0;
625         if (cd->init)
626             ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
627 
628         if (ret >= 0) {
629             s->nb_sub++;
630             goto end;
631         }
632 
633         s->fn[s->nb_sub] = NULL;
634         s->cd[s->nb_sub] = NULL;
635 
636         reset_ctx(sctx);
637         if (ret == AVERROR(ENOMEM))
638             break;
639     }
640 
641     if (!s->nb_sub)
642         av_freep(&s->sub);
643 
644 end:
645     av_free(cd_matches);
646     return ret;
647 }
648 
av_tx_init(AVTXContext ** ctx,av_tx_fn * tx,enum AVTXType type,int inv,int len,const void * scale,uint64_t flags)649 av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
650                        int inv, int len, const void *scale, uint64_t flags)
651 {
652     int ret;
653     AVTXContext tmp = { 0 };
654     const double default_scale_d = 1.0;
655     const float  default_scale_f = 1.0f;
656 
657     if (!len || type >= AV_TX_NB || !ctx || !tx)
658         return AVERROR(EINVAL);
659 
660     if (!(flags & AV_TX_UNALIGNED))
661         flags |= FF_TX_ALIGNED;
662     if (!(flags & AV_TX_INPLACE))
663         flags |= FF_TX_OUT_OF_PLACE;
664 
665     if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT)))
666         scale = &default_scale_f;
667     else if (!scale && (type == AV_TX_DOUBLE_MDCT))
668         scale = &default_scale_d;
669 
670     ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
671     if (ret < 0)
672         return ret;
673 
674     *ctx = &tmp.sub[0];
675     *tx  = tmp.fn[0];
676 
677 #if !CONFIG_SMALL
678     av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n");
679     print_tx_structure(*ctx, 0);
680 #endif
681 
682     return ret;
683 }
684