1 /*
2 * Apple ProRes encoder
3 *
4 * Copyright (c) 2012 Konstantin Shishkov
5 *
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avcodec.h"
30 #include "fdctdsp.h"
31 #include "put_bits.h"
32 #include "profiles.h"
33 #include "bytestream.h"
34 #include "internal.h"
35 #include "proresdata.h"
36
37 #define CFACTOR_Y422 2
38 #define CFACTOR_Y444 3
39
40 #define MAX_MBS_PER_SLICE 8
41
42 #define MAX_PLANES 4
43
44 enum {
45 PRORES_PROFILE_AUTO = -1,
46 PRORES_PROFILE_PROXY = 0,
47 PRORES_PROFILE_LT,
48 PRORES_PROFILE_STANDARD,
49 PRORES_PROFILE_HQ,
50 PRORES_PROFILE_4444,
51 PRORES_PROFILE_4444XQ,
52 };
53
54 enum {
55 QUANT_MAT_PROXY = 0,
56 QUANT_MAT_PROXY_CHROMA,
57 QUANT_MAT_LT,
58 QUANT_MAT_STANDARD,
59 QUANT_MAT_HQ,
60 QUANT_MAT_XQ_LUMA,
61 QUANT_MAT_DEFAULT,
62 };
63
64 static const uint8_t prores_quant_matrices[][64] = {
65 { // proxy
66 4, 7, 9, 11, 13, 14, 15, 63,
67 7, 7, 11, 12, 14, 15, 63, 63,
68 9, 11, 13, 14, 15, 63, 63, 63,
69 11, 11, 13, 14, 63, 63, 63, 63,
70 11, 13, 14, 63, 63, 63, 63, 63,
71 13, 14, 63, 63, 63, 63, 63, 63,
72 13, 63, 63, 63, 63, 63, 63, 63,
73 63, 63, 63, 63, 63, 63, 63, 63,
74 },
75 { // proxy chromas
76 4, 7, 9, 11, 13, 14, 63, 63,
77 7, 7, 11, 12, 14, 63, 63, 63,
78 9, 11, 13, 14, 63, 63, 63, 63,
79 11, 11, 13, 14, 63, 63, 63, 63,
80 11, 13, 14, 63, 63, 63, 63, 63,
81 13, 14, 63, 63, 63, 63, 63, 63,
82 13, 63, 63, 63, 63, 63, 63, 63,
83 63, 63, 63, 63, 63, 63, 63, 63
84 },
85 { // LT
86 4, 5, 6, 7, 9, 11, 13, 15,
87 5, 5, 7, 8, 11, 13, 15, 17,
88 6, 7, 9, 11, 13, 15, 15, 17,
89 7, 7, 9, 11, 13, 15, 17, 19,
90 7, 9, 11, 13, 14, 16, 19, 23,
91 9, 11, 13, 14, 16, 19, 23, 29,
92 9, 11, 13, 15, 17, 21, 28, 35,
93 11, 13, 16, 17, 21, 28, 35, 41,
94 },
95 { // standard
96 4, 4, 5, 5, 6, 7, 7, 9,
97 4, 4, 5, 6, 7, 7, 9, 9,
98 5, 5, 6, 7, 7, 9, 9, 10,
99 5, 5, 6, 7, 7, 9, 9, 10,
100 5, 6, 7, 7, 8, 9, 10, 12,
101 6, 7, 7, 8, 9, 10, 12, 15,
102 6, 7, 7, 9, 10, 11, 14, 17,
103 7, 7, 9, 10, 11, 14, 17, 21,
104 },
105 { // high quality
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 5,
110 4, 4, 4, 4, 4, 4, 5, 5,
111 4, 4, 4, 4, 4, 5, 5, 6,
112 4, 4, 4, 4, 5, 5, 6, 7,
113 4, 4, 4, 4, 5, 6, 7, 7,
114 },
115 { // XQ luma
116 2, 2, 2, 2, 2, 2, 2, 2,
117 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2,
119 2, 2, 2, 2, 2, 2, 2, 3,
120 2, 2, 2, 2, 2, 2, 3, 3,
121 2, 2, 2, 2, 2, 3, 3, 3,
122 2, 2, 2, 2, 3, 3, 3, 4,
123 2, 2, 2, 2, 3, 3, 4, 4,
124 },
125 { // codec default
126 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4,
134 },
135 };
136
137 #define NUM_MB_LIMITS 4
138 static const int prores_mb_limits[NUM_MB_LIMITS] = {
139 1620, // up to 720x576
140 2700, // up to 960x720
141 6075, // up to 1440x1080
142 9216, // up to 2048x1152
143 };
144
145 static const struct prores_profile {
146 const char *full_name;
147 uint32_t tag;
148 int min_quant;
149 int max_quant;
150 int br_tab[NUM_MB_LIMITS];
151 int quant;
152 int quant_chroma;
153 } prores_profile_info[6] = {
154 {
155 .full_name = "proxy",
156 .tag = MKTAG('a', 'p', 'c', 'o'),
157 .min_quant = 4,
158 .max_quant = 8,
159 .br_tab = { 300, 242, 220, 194 },
160 .quant = QUANT_MAT_PROXY,
161 .quant_chroma = QUANT_MAT_PROXY_CHROMA,
162 },
163 {
164 .full_name = "LT",
165 .tag = MKTAG('a', 'p', 'c', 's'),
166 .min_quant = 1,
167 .max_quant = 9,
168 .br_tab = { 720, 560, 490, 440 },
169 .quant = QUANT_MAT_LT,
170 .quant_chroma = QUANT_MAT_LT,
171 },
172 {
173 .full_name = "standard",
174 .tag = MKTAG('a', 'p', 'c', 'n'),
175 .min_quant = 1,
176 .max_quant = 6,
177 .br_tab = { 1050, 808, 710, 632 },
178 .quant = QUANT_MAT_STANDARD,
179 .quant_chroma = QUANT_MAT_STANDARD,
180 },
181 {
182 .full_name = "high quality",
183 .tag = MKTAG('a', 'p', 'c', 'h'),
184 .min_quant = 1,
185 .max_quant = 6,
186 .br_tab = { 1566, 1216, 1070, 950 },
187 .quant = QUANT_MAT_HQ,
188 .quant_chroma = QUANT_MAT_HQ,
189 },
190 {
191 .full_name = "4444",
192 .tag = MKTAG('a', 'p', '4', 'h'),
193 .min_quant = 1,
194 .max_quant = 6,
195 .br_tab = { 2350, 1828, 1600, 1425 },
196 .quant = QUANT_MAT_HQ,
197 .quant_chroma = QUANT_MAT_HQ,
198 },
199 {
200 .full_name = "4444XQ",
201 .tag = MKTAG('a', 'p', '4', 'x'),
202 .min_quant = 1,
203 .max_quant = 6,
204 .br_tab = { 3525, 2742, 2400, 2137 },
205 .quant = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
206 .quant_chroma = QUANT_MAT_HQ,
207 }
208 };
209
210 #define TRELLIS_WIDTH 16
211 #define SCORE_LIMIT INT_MAX / 2
212
213 struct TrellisNode {
214 int prev_node;
215 int quant;
216 int bits;
217 int score;
218 };
219
220 #define MAX_STORED_Q 16
221
222 typedef struct ProresThreadData {
223 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
224 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
225 int16_t custom_q[64];
226 int16_t custom_chroma_q[64];
227 struct TrellisNode *nodes;
228 } ProresThreadData;
229
230 typedef struct ProresContext {
231 AVClass *class;
232 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
233 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
234 int16_t quants[MAX_STORED_Q][64];
235 int16_t quants_chroma[MAX_STORED_Q][64];
236 int16_t custom_q[64];
237 int16_t custom_chroma_q[64];
238 const uint8_t *quant_mat;
239 const uint8_t *quant_chroma_mat;
240 const uint8_t *scantable;
241
242 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
243 ptrdiff_t linesize, int16_t *block);
244 FDCTDSPContext fdsp;
245
246 const AVFrame *pic;
247 int mb_width, mb_height;
248 int mbs_per_slice;
249 int num_chroma_blocks, chroma_factor;
250 int slices_width;
251 int slices_per_picture;
252 int pictures_per_frame; // 1 for progressive, 2 for interlaced
253 int cur_picture_idx;
254 int num_planes;
255 int bits_per_mb;
256 int force_quant;
257 int alpha_bits;
258 int warn;
259
260 char *vendor;
261 int quant_sel;
262
263 int frame_size_upper_bound;
264
265 int profile;
266 const struct prores_profile *profile_info;
267
268 int *slice_q;
269
270 ProresThreadData *tdata;
271 } ProresContext;
272
get_slice_data(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int x,int y,int w,int h,int16_t * blocks,uint16_t * emu_buf,int mbs_per_slice,int blocks_per_mb,int is_chroma)273 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
274 ptrdiff_t linesize, int x, int y, int w, int h,
275 int16_t *blocks, uint16_t *emu_buf,
276 int mbs_per_slice, int blocks_per_mb, int is_chroma)
277 {
278 const uint16_t *esrc;
279 const int mb_width = 4 * blocks_per_mb;
280 ptrdiff_t elinesize;
281 int i, j, k;
282
283 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
284 if (x >= w) {
285 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
286 * sizeof(*blocks));
287 return;
288 }
289 if (x + mb_width <= w && y + 16 <= h) {
290 esrc = src;
291 elinesize = linesize;
292 } else {
293 int bw, bh, pix;
294
295 esrc = emu_buf;
296 elinesize = 16 * sizeof(*emu_buf);
297
298 bw = FFMIN(w - x, mb_width);
299 bh = FFMIN(h - y, 16);
300
301 for (j = 0; j < bh; j++) {
302 memcpy(emu_buf + j * 16,
303 (const uint8_t*)src + j * linesize,
304 bw * sizeof(*src));
305 pix = emu_buf[j * 16 + bw - 1];
306 for (k = bw; k < mb_width; k++)
307 emu_buf[j * 16 + k] = pix;
308 }
309 for (; j < 16; j++)
310 memcpy(emu_buf + j * 16,
311 emu_buf + (bh - 1) * 16,
312 mb_width * sizeof(*emu_buf));
313 }
314 if (!is_chroma) {
315 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
316 blocks += 64;
317 if (blocks_per_mb > 2) {
318 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
319 blocks += 64;
320 }
321 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
322 blocks += 64;
323 if (blocks_per_mb > 2) {
324 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
325 blocks += 64;
326 }
327 } else {
328 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
329 blocks += 64;
330 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
331 blocks += 64;
332 if (blocks_per_mb > 2) {
333 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
334 blocks += 64;
335 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
336 blocks += 64;
337 }
338 }
339
340 x += mb_width;
341 }
342 }
343
get_alpha_data(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int x,int y,int w,int h,int16_t * blocks,int mbs_per_slice,int abits)344 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
345 ptrdiff_t linesize, int x, int y, int w, int h,
346 int16_t *blocks, int mbs_per_slice, int abits)
347 {
348 const int slice_width = 16 * mbs_per_slice;
349 int i, j, copy_w, copy_h;
350
351 copy_w = FFMIN(w - x, slice_width);
352 copy_h = FFMIN(h - y, 16);
353 for (i = 0; i < copy_h; i++) {
354 memcpy(blocks, src, copy_w * sizeof(*src));
355 if (abits == 8)
356 for (j = 0; j < copy_w; j++)
357 blocks[j] >>= 2;
358 else
359 for (j = 0; j < copy_w; j++)
360 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
361 for (j = copy_w; j < slice_width; j++)
362 blocks[j] = blocks[copy_w - 1];
363 blocks += slice_width;
364 src += linesize >> 1;
365 }
366 for (; i < 16; i++) {
367 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
368 blocks += slice_width;
369 }
370 }
371
372 /**
373 * Write an unsigned rice/exp golomb codeword.
374 */
encode_vlc_codeword(PutBitContext * pb,unsigned codebook,int val)375 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
376 {
377 unsigned int rice_order, exp_order, switch_bits, switch_val;
378 int exponent;
379
380 /* number of prefix bits to switch between Rice and expGolomb */
381 switch_bits = (codebook & 3) + 1;
382 rice_order = codebook >> 5; /* rice code order */
383 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
384
385 switch_val = switch_bits << rice_order;
386
387 if (val >= switch_val) {
388 val -= switch_val - (1 << exp_order);
389 exponent = av_log2(val);
390
391 put_bits(pb, exponent - exp_order + switch_bits, 0);
392 put_bits(pb, exponent + 1, val);
393 } else {
394 exponent = val >> rice_order;
395
396 if (exponent)
397 put_bits(pb, exponent, 0);
398 put_bits(pb, 1, 1);
399 if (rice_order)
400 put_sbits(pb, rice_order, val);
401 }
402 }
403
404 #define GET_SIGN(x) ((x) >> 31)
405 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
406
encode_dcs(PutBitContext * pb,int16_t * blocks,int blocks_per_slice,int scale)407 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
408 int blocks_per_slice, int scale)
409 {
410 int i;
411 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
412
413 prev_dc = (blocks[0] - 0x4000) / scale;
414 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
415 sign = 0;
416 codebook = 3;
417 blocks += 64;
418
419 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
420 dc = (blocks[0] - 0x4000) / scale;
421 delta = dc - prev_dc;
422 new_sign = GET_SIGN(delta);
423 delta = (delta ^ sign) - sign;
424 code = MAKE_CODE(delta);
425 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
426 codebook = (code + (code & 1)) >> 1;
427 codebook = FFMIN(codebook, 3);
428 sign = new_sign;
429 prev_dc = dc;
430 }
431 }
432
encode_acs(PutBitContext * pb,int16_t * blocks,int blocks_per_slice,int plane_size_factor,const uint8_t * scan,const int16_t * qmat)433 static void encode_acs(PutBitContext *pb, int16_t *blocks,
434 int blocks_per_slice,
435 int plane_size_factor,
436 const uint8_t *scan, const int16_t *qmat)
437 {
438 int idx, i;
439 int run, level, run_cb, lev_cb;
440 int max_coeffs, abs_level;
441
442 max_coeffs = blocks_per_slice << 6;
443 run_cb = ff_prores_run_to_cb_index[4];
444 lev_cb = ff_prores_lev_to_cb_index[2];
445 run = 0;
446
447 for (i = 1; i < 64; i++) {
448 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
449 level = blocks[idx] / qmat[scan[i]];
450 if (level) {
451 abs_level = FFABS(level);
452 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
453 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
454 abs_level - 1);
455 put_sbits(pb, 1, GET_SIGN(level));
456
457 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
458 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
459 run = 0;
460 } else {
461 run++;
462 }
463 }
464 }
465 }
466
encode_slice_plane(ProresContext * ctx,PutBitContext * pb,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int16_t * blocks,int blocks_per_mb,int plane_size_factor,const int16_t * qmat)467 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
468 const uint16_t *src, ptrdiff_t linesize,
469 int mbs_per_slice, int16_t *blocks,
470 int blocks_per_mb, int plane_size_factor,
471 const int16_t *qmat)
472 {
473 int blocks_per_slice, saved_pos;
474
475 saved_pos = put_bits_count(pb);
476 blocks_per_slice = mbs_per_slice * blocks_per_mb;
477
478 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
479 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
480 ctx->scantable, qmat);
481 flush_put_bits(pb);
482
483 return (put_bits_count(pb) - saved_pos) >> 3;
484 }
485
put_alpha_diff(PutBitContext * pb,int cur,int prev,int abits)486 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
487 {
488 const int dbits = (abits == 8) ? 4 : 7;
489 const int dsize = 1 << dbits - 1;
490 int diff = cur - prev;
491
492 diff = av_mod_uintp2(diff, abits);
493 if (diff >= (1 << abits) - dsize)
494 diff -= 1 << abits;
495 if (diff < -dsize || diff > dsize || !diff) {
496 put_bits(pb, 1, 1);
497 put_bits(pb, abits, diff);
498 } else {
499 put_bits(pb, 1, 0);
500 put_bits(pb, dbits - 1, FFABS(diff) - 1);
501 put_bits(pb, 1, diff < 0);
502 }
503 }
504
put_alpha_run(PutBitContext * pb,int run)505 static void put_alpha_run(PutBitContext *pb, int run)
506 {
507 if (run) {
508 put_bits(pb, 1, 0);
509 if (run < 0x10)
510 put_bits(pb, 4, run);
511 else
512 put_bits(pb, 15, run);
513 } else {
514 put_bits(pb, 1, 1);
515 }
516 }
517
518 // todo alpha quantisation for high quants
encode_alpha_plane(ProresContext * ctx,PutBitContext * pb,int mbs_per_slice,uint16_t * blocks,int quant)519 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
520 int mbs_per_slice, uint16_t *blocks,
521 int quant)
522 {
523 const int abits = ctx->alpha_bits;
524 const int mask = (1 << abits) - 1;
525 const int num_coeffs = mbs_per_slice * 256;
526 int saved_pos = put_bits_count(pb);
527 int prev = mask, cur;
528 int idx = 0;
529 int run = 0;
530
531 cur = blocks[idx++];
532 put_alpha_diff(pb, cur, prev, abits);
533 prev = cur;
534 do {
535 cur = blocks[idx++];
536 if (cur != prev) {
537 put_alpha_run (pb, run);
538 put_alpha_diff(pb, cur, prev, abits);
539 prev = cur;
540 run = 0;
541 } else {
542 run++;
543 }
544 } while (idx < num_coeffs);
545 if (run)
546 put_alpha_run(pb, run);
547 flush_put_bits(pb);
548 return (put_bits_count(pb) - saved_pos) >> 3;
549 }
550
encode_slice(AVCodecContext * avctx,const AVFrame * pic,PutBitContext * pb,int sizes[4],int x,int y,int quant,int mbs_per_slice)551 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
552 PutBitContext *pb,
553 int sizes[4], int x, int y, int quant,
554 int mbs_per_slice)
555 {
556 ProresContext *ctx = avctx->priv_data;
557 int i, xp, yp;
558 int total_size = 0;
559 const uint16_t *src;
560 int slice_width_factor = av_log2(mbs_per_slice);
561 int num_cblocks, pwidth, line_add;
562 ptrdiff_t linesize;
563 int plane_factor, is_chroma;
564 uint16_t *qmat;
565 uint16_t *qmat_chroma;
566
567 if (ctx->pictures_per_frame == 1)
568 line_add = 0;
569 else
570 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
571
572 if (ctx->force_quant) {
573 qmat = ctx->quants[0];
574 qmat_chroma = ctx->quants_chroma[0];
575 } else if (quant < MAX_STORED_Q) {
576 qmat = ctx->quants[quant];
577 qmat_chroma = ctx->quants_chroma[quant];
578 } else {
579 qmat = ctx->custom_q;
580 qmat_chroma = ctx->custom_chroma_q;
581 for (i = 0; i < 64; i++) {
582 qmat[i] = ctx->quant_mat[i] * quant;
583 qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
584 }
585 }
586
587 for (i = 0; i < ctx->num_planes; i++) {
588 is_chroma = (i == 1 || i == 2);
589 plane_factor = slice_width_factor + 2;
590 if (is_chroma)
591 plane_factor += ctx->chroma_factor - 3;
592 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
593 xp = x << 4;
594 yp = y << 4;
595 num_cblocks = 4;
596 pwidth = avctx->width;
597 } else {
598 xp = x << 3;
599 yp = y << 4;
600 num_cblocks = 2;
601 pwidth = avctx->width >> 1;
602 }
603
604 linesize = pic->linesize[i] * ctx->pictures_per_frame;
605 src = (const uint16_t*)(pic->data[i] + yp * linesize +
606 line_add * pic->linesize[i]) + xp;
607
608 if (i < 3) {
609 get_slice_data(ctx, src, linesize, xp, yp,
610 pwidth, avctx->height / ctx->pictures_per_frame,
611 ctx->blocks[0], ctx->emu_buf,
612 mbs_per_slice, num_cblocks, is_chroma);
613 if (!is_chroma) {/* luma quant */
614 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
615 mbs_per_slice, ctx->blocks[0],
616 num_cblocks, plane_factor,
617 qmat);
618 } else { /* chroma plane */
619 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
620 mbs_per_slice, ctx->blocks[0],
621 num_cblocks, plane_factor,
622 qmat_chroma);
623 }
624 } else {
625 get_alpha_data(ctx, src, linesize, xp, yp,
626 pwidth, avctx->height / ctx->pictures_per_frame,
627 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
628 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
629 ctx->blocks[0], quant);
630 }
631 total_size += sizes[i];
632 if (put_bits_left(pb) < 0) {
633 av_log(avctx, AV_LOG_ERROR,
634 "Underestimated required buffer size.\n");
635 return AVERROR_BUG;
636 }
637 }
638 return total_size;
639 }
640
estimate_vlc(unsigned codebook,int val)641 static inline int estimate_vlc(unsigned codebook, int val)
642 {
643 unsigned int rice_order, exp_order, switch_bits, switch_val;
644 int exponent;
645
646 /* number of prefix bits to switch between Rice and expGolomb */
647 switch_bits = (codebook & 3) + 1;
648 rice_order = codebook >> 5; /* rice code order */
649 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
650
651 switch_val = switch_bits << rice_order;
652
653 if (val >= switch_val) {
654 val -= switch_val - (1 << exp_order);
655 exponent = av_log2(val);
656
657 return exponent * 2 - exp_order + switch_bits + 1;
658 } else {
659 return (val >> rice_order) + rice_order + 1;
660 }
661 }
662
estimate_dcs(int * error,int16_t * blocks,int blocks_per_slice,int scale)663 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
664 int scale)
665 {
666 int i;
667 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
668 int bits;
669
670 prev_dc = (blocks[0] - 0x4000) / scale;
671 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
672 sign = 0;
673 codebook = 3;
674 blocks += 64;
675 *error += FFABS(blocks[0] - 0x4000) % scale;
676
677 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
678 dc = (blocks[0] - 0x4000) / scale;
679 *error += FFABS(blocks[0] - 0x4000) % scale;
680 delta = dc - prev_dc;
681 new_sign = GET_SIGN(delta);
682 delta = (delta ^ sign) - sign;
683 code = MAKE_CODE(delta);
684 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
685 codebook = (code + (code & 1)) >> 1;
686 codebook = FFMIN(codebook, 3);
687 sign = new_sign;
688 prev_dc = dc;
689 }
690
691 return bits;
692 }
693
estimate_acs(int * error,int16_t * blocks,int blocks_per_slice,int plane_size_factor,const uint8_t * scan,const int16_t * qmat)694 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
695 int plane_size_factor,
696 const uint8_t *scan, const int16_t *qmat)
697 {
698 int idx, i;
699 int run, level, run_cb, lev_cb;
700 int max_coeffs, abs_level;
701 int bits = 0;
702
703 max_coeffs = blocks_per_slice << 6;
704 run_cb = ff_prores_run_to_cb_index[4];
705 lev_cb = ff_prores_lev_to_cb_index[2];
706 run = 0;
707
708 for (i = 1; i < 64; i++) {
709 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
710 level = blocks[idx] / qmat[scan[i]];
711 *error += FFABS(blocks[idx]) % qmat[scan[i]];
712 if (level) {
713 abs_level = FFABS(level);
714 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
715 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
716 abs_level - 1) + 1;
717
718 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
719 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
720 run = 0;
721 } else {
722 run++;
723 }
724 }
725 }
726
727 return bits;
728 }
729
estimate_slice_plane(ProresContext * ctx,int * error,int plane,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int blocks_per_mb,int plane_size_factor,const int16_t * qmat,ProresThreadData * td)730 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
731 const uint16_t *src, ptrdiff_t linesize,
732 int mbs_per_slice,
733 int blocks_per_mb, int plane_size_factor,
734 const int16_t *qmat, ProresThreadData *td)
735 {
736 int blocks_per_slice;
737 int bits;
738
739 blocks_per_slice = mbs_per_slice * blocks_per_mb;
740
741 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
742 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
743 plane_size_factor, ctx->scantable, qmat);
744
745 return FFALIGN(bits, 8);
746 }
747
est_alpha_diff(int cur,int prev,int abits)748 static int est_alpha_diff(int cur, int prev, int abits)
749 {
750 const int dbits = (abits == 8) ? 4 : 7;
751 const int dsize = 1 << dbits - 1;
752 int diff = cur - prev;
753
754 diff = av_mod_uintp2(diff, abits);
755 if (diff >= (1 << abits) - dsize)
756 diff -= 1 << abits;
757 if (diff < -dsize || diff > dsize || !diff)
758 return abits + 1;
759 else
760 return dbits + 1;
761 }
762
estimate_alpha_plane(ProresContext * ctx,const uint16_t * src,ptrdiff_t linesize,int mbs_per_slice,int16_t * blocks)763 static int estimate_alpha_plane(ProresContext *ctx,
764 const uint16_t *src, ptrdiff_t linesize,
765 int mbs_per_slice, int16_t *blocks)
766 {
767 const int abits = ctx->alpha_bits;
768 const int mask = (1 << abits) - 1;
769 const int num_coeffs = mbs_per_slice * 256;
770 int prev = mask, cur;
771 int idx = 0;
772 int run = 0;
773 int bits;
774
775 cur = blocks[idx++];
776 bits = est_alpha_diff(cur, prev, abits);
777 prev = cur;
778 do {
779 cur = blocks[idx++];
780 if (cur != prev) {
781 if (!run)
782 bits++;
783 else if (run < 0x10)
784 bits += 4;
785 else
786 bits += 15;
787 bits += est_alpha_diff(cur, prev, abits);
788 prev = cur;
789 run = 0;
790 } else {
791 run++;
792 }
793 } while (idx < num_coeffs);
794
795 if (run) {
796 if (run < 0x10)
797 bits += 4;
798 else
799 bits += 15;
800 }
801
802 return bits;
803 }
804
find_slice_quant(AVCodecContext * avctx,int trellis_node,int x,int y,int mbs_per_slice,ProresThreadData * td)805 static int find_slice_quant(AVCodecContext *avctx,
806 int trellis_node, int x, int y, int mbs_per_slice,
807 ProresThreadData *td)
808 {
809 ProresContext *ctx = avctx->priv_data;
810 int i, q, pq, xp, yp;
811 const uint16_t *src;
812 int slice_width_factor = av_log2(mbs_per_slice);
813 int num_cblocks[MAX_PLANES], pwidth;
814 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
815 const int min_quant = ctx->profile_info->min_quant;
816 const int max_quant = ctx->profile_info->max_quant;
817 int error, bits, bits_limit;
818 int mbs, prev, cur, new_score;
819 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
820 int overquant;
821 uint16_t *qmat;
822 uint16_t *qmat_chroma;
823 int linesize[4], line_add;
824 int alpha_bits = 0;
825
826 if (ctx->pictures_per_frame == 1)
827 line_add = 0;
828 else
829 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
830 mbs = x + mbs_per_slice;
831
832 for (i = 0; i < ctx->num_planes; i++) {
833 is_chroma[i] = (i == 1 || i == 2);
834 plane_factor[i] = slice_width_factor + 2;
835 if (is_chroma[i])
836 plane_factor[i] += ctx->chroma_factor - 3;
837 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
838 xp = x << 4;
839 yp = y << 4;
840 num_cblocks[i] = 4;
841 pwidth = avctx->width;
842 } else {
843 xp = x << 3;
844 yp = y << 4;
845 num_cblocks[i] = 2;
846 pwidth = avctx->width >> 1;
847 }
848
849 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
850 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
851 line_add * ctx->pic->linesize[i]) + xp;
852
853 if (i < 3) {
854 get_slice_data(ctx, src, linesize[i], xp, yp,
855 pwidth, avctx->height / ctx->pictures_per_frame,
856 td->blocks[i], td->emu_buf,
857 mbs_per_slice, num_cblocks[i], is_chroma[i]);
858 } else {
859 get_alpha_data(ctx, src, linesize[i], xp, yp,
860 pwidth, avctx->height / ctx->pictures_per_frame,
861 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
862 }
863 }
864
865 for (q = min_quant; q < max_quant + 2; q++) {
866 td->nodes[trellis_node + q].prev_node = -1;
867 td->nodes[trellis_node + q].quant = q;
868 }
869
870 if (ctx->alpha_bits)
871 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
872 mbs_per_slice, td->blocks[3]);
873 // todo: maybe perform coarser quantising to fit into frame size when needed
874 for (q = min_quant; q <= max_quant; q++) {
875 bits = alpha_bits;
876 error = 0;
877 bits += estimate_slice_plane(ctx, &error, 0,
878 src, linesize[0],
879 mbs_per_slice,
880 num_cblocks[0], plane_factor[0],
881 ctx->quants[q], td); /* estimate luma plane */
882 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
883 bits += estimate_slice_plane(ctx, &error, i,
884 src, linesize[i],
885 mbs_per_slice,
886 num_cblocks[i], plane_factor[i],
887 ctx->quants_chroma[q], td);
888 }
889 if (bits > 65000 * 8)
890 error = SCORE_LIMIT;
891
892 slice_bits[q] = bits;
893 slice_score[q] = error;
894 }
895 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
896 slice_bits[max_quant + 1] = slice_bits[max_quant];
897 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
898 overquant = max_quant;
899 } else {
900 for (q = max_quant + 1; q < 128; q++) {
901 bits = alpha_bits;
902 error = 0;
903 if (q < MAX_STORED_Q) {
904 qmat = ctx->quants[q];
905 qmat_chroma = ctx->quants_chroma[q];
906 } else {
907 qmat = td->custom_q;
908 qmat_chroma = td->custom_chroma_q;
909 for (i = 0; i < 64; i++) {
910 qmat[i] = ctx->quant_mat[i] * q;
911 qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
912 }
913 }
914 bits += estimate_slice_plane(ctx, &error, 0,
915 src, linesize[0],
916 mbs_per_slice,
917 num_cblocks[0], plane_factor[0],
918 qmat, td);/* estimate luma plane */
919 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
920 bits += estimate_slice_plane(ctx, &error, i,
921 src, linesize[i],
922 mbs_per_slice,
923 num_cblocks[i], plane_factor[i],
924 qmat_chroma, td);
925 }
926 if (bits <= ctx->bits_per_mb * mbs_per_slice)
927 break;
928 }
929
930 slice_bits[max_quant + 1] = bits;
931 slice_score[max_quant + 1] = error;
932 overquant = q;
933 }
934 td->nodes[trellis_node + max_quant + 1].quant = overquant;
935
936 bits_limit = mbs * ctx->bits_per_mb;
937 for (pq = min_quant; pq < max_quant + 2; pq++) {
938 prev = trellis_node - TRELLIS_WIDTH + pq;
939
940 for (q = min_quant; q < max_quant + 2; q++) {
941 cur = trellis_node + q;
942
943 bits = td->nodes[prev].bits + slice_bits[q];
944 error = slice_score[q];
945 if (bits > bits_limit)
946 error = SCORE_LIMIT;
947
948 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
949 new_score = td->nodes[prev].score + error;
950 else
951 new_score = SCORE_LIMIT;
952 if (td->nodes[cur].prev_node == -1 ||
953 td->nodes[cur].score >= new_score) {
954
955 td->nodes[cur].bits = bits;
956 td->nodes[cur].score = new_score;
957 td->nodes[cur].prev_node = prev;
958 }
959 }
960 }
961
962 error = td->nodes[trellis_node + min_quant].score;
963 pq = trellis_node + min_quant;
964 for (q = min_quant + 1; q < max_quant + 2; q++) {
965 if (td->nodes[trellis_node + q].score <= error) {
966 error = td->nodes[trellis_node + q].score;
967 pq = trellis_node + q;
968 }
969 }
970
971 return pq;
972 }
973
find_quant_thread(AVCodecContext * avctx,void * arg,int jobnr,int threadnr)974 static int find_quant_thread(AVCodecContext *avctx, void *arg,
975 int jobnr, int threadnr)
976 {
977 ProresContext *ctx = avctx->priv_data;
978 ProresThreadData *td = ctx->tdata + threadnr;
979 int mbs_per_slice = ctx->mbs_per_slice;
980 int x, y = jobnr, mb, q = 0;
981
982 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
983 while (ctx->mb_width - x < mbs_per_slice)
984 mbs_per_slice >>= 1;
985 q = find_slice_quant(avctx,
986 (mb + 1) * TRELLIS_WIDTH, x, y,
987 mbs_per_slice, td);
988 }
989
990 for (x = ctx->slices_width - 1; x >= 0; x--) {
991 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
992 q = td->nodes[q].prev_node;
993 }
994
995 return 0;
996 }
997
encode_frame(AVCodecContext * avctx,AVPacket * pkt,const AVFrame * pic,int * got_packet)998 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
999 const AVFrame *pic, int *got_packet)
1000 {
1001 ProresContext *ctx = avctx->priv_data;
1002 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
1003 uint8_t *picture_size_pos;
1004 PutBitContext pb;
1005 int x, y, i, mb, q = 0;
1006 int sizes[4] = { 0 };
1007 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
1008 int frame_size, picture_size, slice_size;
1009 int pkt_size, ret;
1010 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1011 uint8_t frame_flags;
1012
1013 ctx->pic = pic;
1014 pkt_size = ctx->frame_size_upper_bound;
1015
1016 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1017 return ret;
1018
1019 orig_buf = pkt->data;
1020
1021 // frame atom
1022 orig_buf += 4; // frame size
1023 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
1024 buf = orig_buf;
1025
1026 // frame header
1027 tmp = buf;
1028 buf += 2; // frame header size will be stored here
1029 bytestream_put_be16 (&buf, 0); // version 1
1030 bytestream_put_buffer(&buf, ctx->vendor, 4);
1031 bytestream_put_be16 (&buf, avctx->width);
1032 bytestream_put_be16 (&buf, avctx->height);
1033
1034 frame_flags = ctx->chroma_factor << 6;
1035 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1036 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1037 bytestream_put_byte (&buf, frame_flags);
1038
1039 bytestream_put_byte (&buf, 0); // reserved
1040 bytestream_put_byte (&buf, pic->color_primaries);
1041 bytestream_put_byte (&buf, pic->color_trc);
1042 bytestream_put_byte (&buf, pic->colorspace);
1043 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
1044 bytestream_put_byte (&buf, 0); // reserved
1045 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1046 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
1047 // luma quantisation matrix
1048 for (i = 0; i < 64; i++)
1049 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1050 // chroma quantisation matrix
1051 for (i = 0; i < 64; i++)
1052 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1053 } else {
1054 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
1055 }
1056 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
1057
1058 for (ctx->cur_picture_idx = 0;
1059 ctx->cur_picture_idx < ctx->pictures_per_frame;
1060 ctx->cur_picture_idx++) {
1061 // picture header
1062 picture_size_pos = buf + 1;
1063 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1064 buf += 4; // picture data size will be stored here
1065 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1066 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1067
1068 // seek table - will be filled during slice encoding
1069 slice_sizes = buf;
1070 buf += ctx->slices_per_picture * 2;
1071
1072 // slices
1073 if (!ctx->force_quant) {
1074 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1075 ctx->mb_height);
1076 if (ret)
1077 return ret;
1078 }
1079
1080 for (y = 0; y < ctx->mb_height; y++) {
1081 int mbs_per_slice = ctx->mbs_per_slice;
1082 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1083 q = ctx->force_quant ? ctx->force_quant
1084 : ctx->slice_q[mb + y * ctx->slices_width];
1085
1086 while (ctx->mb_width - x < mbs_per_slice)
1087 mbs_per_slice >>= 1;
1088
1089 bytestream_put_byte(&buf, slice_hdr_size << 3);
1090 slice_hdr = buf;
1091 buf += slice_hdr_size - 1;
1092 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1093 uint8_t *start = pkt->data;
1094 // Recompute new size according to max_slice_size
1095 // and deduce delta
1096 int delta = 200 + (ctx->pictures_per_frame *
1097 ctx->slices_per_picture + 1) *
1098 max_slice_size - pkt_size;
1099
1100 delta = FFMAX(delta, 2 * max_slice_size);
1101 ctx->frame_size_upper_bound += delta;
1102
1103 if (!ctx->warn) {
1104 avpriv_request_sample(avctx,
1105 "Packet too small: is %i,"
1106 " needs %i (slice: %i). "
1107 "Correct allocation",
1108 pkt_size, delta, max_slice_size);
1109 ctx->warn = 1;
1110 }
1111
1112 ret = av_grow_packet(pkt, delta);
1113 if (ret < 0)
1114 return ret;
1115
1116 pkt_size += delta;
1117 // restore pointers
1118 orig_buf = pkt->data + (orig_buf - start);
1119 buf = pkt->data + (buf - start);
1120 picture_size_pos = pkt->data + (picture_size_pos - start);
1121 slice_sizes = pkt->data + (slice_sizes - start);
1122 slice_hdr = pkt->data + (slice_hdr - start);
1123 tmp = pkt->data + (tmp - start);
1124 }
1125 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1126 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1127 mbs_per_slice);
1128 if (ret < 0)
1129 return ret;
1130
1131 bytestream_put_byte(&slice_hdr, q);
1132 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1133 for (i = 0; i < ctx->num_planes - 1; i++) {
1134 bytestream_put_be16(&slice_hdr, sizes[i]);
1135 slice_size += sizes[i];
1136 }
1137 bytestream_put_be16(&slice_sizes, slice_size);
1138 buf += slice_size - slice_hdr_size;
1139 if (max_slice_size < slice_size)
1140 max_slice_size = slice_size;
1141 }
1142 }
1143
1144 picture_size = buf - (picture_size_pos - 1);
1145 bytestream_put_be32(&picture_size_pos, picture_size);
1146 }
1147
1148 orig_buf -= 8;
1149 frame_size = buf - orig_buf;
1150 bytestream_put_be32(&orig_buf, frame_size);
1151
1152 pkt->size = frame_size;
1153 pkt->flags |= AV_PKT_FLAG_KEY;
1154 *got_packet = 1;
1155
1156 return 0;
1157 }
1158
encode_close(AVCodecContext * avctx)1159 static av_cold int encode_close(AVCodecContext *avctx)
1160 {
1161 ProresContext *ctx = avctx->priv_data;
1162 int i;
1163
1164 if (ctx->tdata) {
1165 for (i = 0; i < avctx->thread_count; i++)
1166 av_freep(&ctx->tdata[i].nodes);
1167 }
1168 av_freep(&ctx->tdata);
1169 av_freep(&ctx->slice_q);
1170
1171 return 0;
1172 }
1173
prores_fdct(FDCTDSPContext * fdsp,const uint16_t * src,ptrdiff_t linesize,int16_t * block)1174 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1175 ptrdiff_t linesize, int16_t *block)
1176 {
1177 int x, y;
1178 const uint16_t *tsrc = src;
1179
1180 for (y = 0; y < 8; y++) {
1181 for (x = 0; x < 8; x++)
1182 block[y * 8 + x] = tsrc[x];
1183 tsrc += linesize >> 1;
1184 }
1185 fdsp->fdct(block);
1186 }
1187
encode_init(AVCodecContext * avctx)1188 static av_cold int encode_init(AVCodecContext *avctx)
1189 {
1190 ProresContext *ctx = avctx->priv_data;
1191 int mps;
1192 int i, j;
1193 int min_quant, max_quant;
1194 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1195
1196 avctx->bits_per_raw_sample = 10;
1197 #if FF_API_CODED_FRAME
1198 FF_DISABLE_DEPRECATION_WARNINGS
1199 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1200 avctx->coded_frame->key_frame = 1;
1201 FF_ENABLE_DEPRECATION_WARNINGS
1202 #endif
1203
1204 ctx->fdct = prores_fdct;
1205 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1206 : ff_prores_progressive_scan;
1207 ff_fdctdsp_init(&ctx->fdsp, avctx);
1208
1209 mps = ctx->mbs_per_slice;
1210 if (mps & (mps - 1)) {
1211 av_log(avctx, AV_LOG_ERROR,
1212 "there should be an integer power of two MBs per slice\n");
1213 return AVERROR(EINVAL);
1214 }
1215 if (ctx->profile == PRORES_PROFILE_AUTO) {
1216 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1217 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1218 !(desc->log2_chroma_w + desc->log2_chroma_h))
1219 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1220 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1221 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1222 ? "4:4:4:4 profile because of the used input colorspace"
1223 : "HQ profile to keep best quality");
1224 }
1225 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1226 if (ctx->profile != PRORES_PROFILE_4444 &&
1227 ctx->profile != PRORES_PROFILE_4444XQ) {
1228 // force alpha and warn
1229 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1230 "encode alpha. Override with -profile if needed.\n");
1231 ctx->alpha_bits = 0;
1232 }
1233 if (ctx->alpha_bits & 7) {
1234 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1235 return AVERROR(EINVAL);
1236 }
1237 avctx->bits_per_coded_sample = 32;
1238 } else {
1239 ctx->alpha_bits = 0;
1240 }
1241
1242 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1243 ? CFACTOR_Y422
1244 : CFACTOR_Y444;
1245 ctx->profile_info = prores_profile_info + ctx->profile;
1246 ctx->num_planes = 3 + !!ctx->alpha_bits;
1247
1248 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1249
1250 if (interlaced)
1251 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1252 else
1253 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1254
1255 ctx->slices_width = ctx->mb_width / mps;
1256 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1257 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1258 ctx->pictures_per_frame = 1 + interlaced;
1259
1260 if (ctx->quant_sel == -1) {
1261 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1262 ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1263 } else {
1264 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1265 ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1266 }
1267
1268 if (strlen(ctx->vendor) != 4) {
1269 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1270 return AVERROR_INVALIDDATA;
1271 }
1272
1273 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1274 if (!ctx->force_quant) {
1275 if (!ctx->bits_per_mb) {
1276 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1277 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1278 ctx->pictures_per_frame)
1279 break;
1280 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1281 if (ctx->alpha_bits)
1282 ctx->bits_per_mb *= 20;
1283 } else if (ctx->bits_per_mb < 128) {
1284 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1285 return AVERROR_INVALIDDATA;
1286 }
1287
1288 min_quant = ctx->profile_info->min_quant;
1289 max_quant = ctx->profile_info->max_quant;
1290 for (i = min_quant; i < MAX_STORED_Q; i++) {
1291 for (j = 0; j < 64; j++) {
1292 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1293 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1294 }
1295 }
1296
1297 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1298 if (!ctx->slice_q) {
1299 encode_close(avctx);
1300 return AVERROR(ENOMEM);
1301 }
1302
1303 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1304 if (!ctx->tdata) {
1305 encode_close(avctx);
1306 return AVERROR(ENOMEM);
1307 }
1308
1309 for (j = 0; j < avctx->thread_count; j++) {
1310 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1311 * TRELLIS_WIDTH
1312 * sizeof(*ctx->tdata->nodes));
1313 if (!ctx->tdata[j].nodes) {
1314 encode_close(avctx);
1315 return AVERROR(ENOMEM);
1316 }
1317 for (i = min_quant; i < max_quant + 2; i++) {
1318 ctx->tdata[j].nodes[i].prev_node = -1;
1319 ctx->tdata[j].nodes[i].bits = 0;
1320 ctx->tdata[j].nodes[i].score = 0;
1321 }
1322 }
1323 } else {
1324 int ls = 0;
1325 int ls_chroma = 0;
1326
1327 if (ctx->force_quant > 64) {
1328 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1329 return AVERROR_INVALIDDATA;
1330 }
1331
1332 for (j = 0; j < 64; j++) {
1333 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1334 ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1335 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1336 ls_chroma += av_log2((1 << 11) / ctx->quants_chroma[0][j]) * 2 + 1;
1337 }
1338
1339 ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1340 if (ctx->chroma_factor == CFACTOR_Y444)
1341 ctx->bits_per_mb += ls_chroma * 4;
1342 }
1343
1344 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1345 ctx->slices_per_picture + 1) *
1346 (2 + 2 * ctx->num_planes +
1347 (mps * ctx->bits_per_mb) / 8)
1348 + 200;
1349
1350 if (ctx->alpha_bits) {
1351 // The alpha plane is run-coded and might exceed the bit budget.
1352 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1353 ctx->slices_per_picture + 1) *
1354 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1355 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1356 }
1357
1358 avctx->codec_tag = ctx->profile_info->tag;
1359
1360 av_log(avctx, AV_LOG_DEBUG,
1361 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1362 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1363 interlaced ? "yes" : "no", ctx->bits_per_mb);
1364 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1365 ctx->frame_size_upper_bound);
1366
1367 return 0;
1368 }
1369
1370 #define OFFSET(x) offsetof(ProresContext, x)
1371 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1372
1373 static const AVOption options[] = {
1374 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1375 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1376 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1377 { .i64 = PRORES_PROFILE_AUTO },
1378 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1379 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1380 0, 0, VE, "profile" },
1381 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1382 0, 0, VE, "profile" },
1383 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1384 0, 0, VE, "profile" },
1385 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1386 0, 0, VE, "profile" },
1387 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1388 0, 0, VE, "profile" },
1389 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1390 0, 0, VE, "profile" },
1391 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1392 0, 0, VE, "profile" },
1393 { "vendor", "vendor ID", OFFSET(vendor),
1394 AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1395 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1396 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1397 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1398 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1399 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1400 0, 0, VE, "quant_mat" },
1401 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1402 0, 0, VE, "quant_mat" },
1403 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1404 0, 0, VE, "quant_mat" },
1405 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1406 0, 0, VE, "quant_mat" },
1407 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1408 0, 0, VE, "quant_mat" },
1409 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1410 0, 0, VE, "quant_mat" },
1411 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1412 { .i64 = 16 }, 0, 16, VE },
1413 { NULL }
1414 };
1415
1416 static const AVClass proresenc_class = {
1417 .class_name = "ProRes encoder",
1418 .item_name = av_default_item_name,
1419 .option = options,
1420 .version = LIBAVUTIL_VERSION_INT,
1421 };
1422
1423 AVCodec ff_prores_ks_encoder = {
1424 .name = "prores_ks",
1425 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1426 .type = AVMEDIA_TYPE_VIDEO,
1427 .id = AV_CODEC_ID_PRORES,
1428 .priv_data_size = sizeof(ProresContext),
1429 .init = encode_init,
1430 .close = encode_close,
1431 .encode2 = encode_frame,
1432 .capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1433 .pix_fmts = (const enum AVPixelFormat[]) {
1434 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1435 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1436 },
1437 .priv_class = &proresenc_class,
1438 .profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1439 };
1440