1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "me_cmp.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
ff_snow_inner_add_yblock(const uint8_t * obmc,const int obmc_stride,uint8_t ** block,int b_w,int b_h,int src_x,int src_y,int src_stride,slice_buffer * sb,int add,uint8_t * dst8)36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38 int y, x;
39 IDWTELEM * dst;
40 for(y=0; y<b_h; y++){
41 //FIXME ugly misuse of obmc_stride
42 const uint8_t *obmc1= obmc + y*obmc_stride;
43 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46 dst = slice_buffer_get_line(sb, src_y + y);
47 for(x=0; x<b_w; x++){
48 int v= obmc1[x] * block[3][x + y*src_stride]
49 +obmc2[x] * block[2][x + y*src_stride]
50 +obmc3[x] * block[1][x + y*src_stride]
51 +obmc4[x] * block[0][x + y*src_stride];
52
53 v <<= 8 - LOG2_OBMC_MAX;
54 if(FRAC_BITS != 8){
55 v >>= 8 - FRAC_BITS;
56 }
57 if(add){
58 v += dst[x + src_x];
59 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60 if(v&(~255)) v= ~(v>>31);
61 dst8[x + y*src_stride] = v;
62 }else{
63 dst[x + src_x] -= v;
64 }
65 }
66 }
67 }
68
ff_snow_get_buffer(SnowContext * s,AVFrame * frame)69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71 int ret, i;
72 int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74 frame->width = s->avctx->width ;
75 frame->height = s->avctx->height;
76 if (edges_needed) {
77 frame->width += 2 * EDGE_WIDTH;
78 frame->height += 2 * EDGE_WIDTH;
79 }
80 if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81 return ret;
82 if (edges_needed) {
83 for (i = 0; frame->data[i]; i++) {
84 int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85 frame->linesize[i] +
86 (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87 frame->data[i] += offset;
88 }
89 frame->width = s->avctx->width;
90 frame->height = s->avctx->height;
91 }
92
93 return 0;
94 }
95
ff_snow_reset_contexts(SnowContext * s)96 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97 int plane_index, level, orientation;
98
99 for(plane_index=0; plane_index<3; plane_index++){
100 for(level=0; level<MAX_DECOMPOSITIONS; level++){
101 for(orientation=level ? 1:0; orientation<4; orientation++){
102 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103 }
104 }
105 }
106 memset(s->header_state, MID_STATE, sizeof(s->header_state));
107 memset(s->block_state, MID_STATE, sizeof(s->block_state));
108 }
109
ff_snow_alloc_blocks(SnowContext * s)110 int ff_snow_alloc_blocks(SnowContext *s){
111 int w= AV_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
112 int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114 s->b_width = w;
115 s->b_height= h;
116
117 av_free(s->block);
118 s->block= av_mallocz_array(w * h, sizeof(BlockNode) << (s->block_max_depth*2));
119 if (!s->block)
120 return AVERROR(ENOMEM);
121
122 return 0;
123 }
124
init_qexp(void)125 static av_cold void init_qexp(void){
126 int i;
127 double v=128;
128
129 for(i=0; i<QROOT; i++){
130 ff_qexp[i]= lrintf(v);
131 v *= pow(2, 1.0 / QROOT);
132 }
133 }
mc_block(Plane * p,uint8_t * dst,const uint8_t * src,int stride,int b_w,int b_h,int dx,int dy)134 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135 static const uint8_t weight[64]={
136 8,7,6,5,4,3,2,1,
137 7,7,0,0,0,0,0,1,
138 6,0,6,0,0,0,2,0,
139 5,0,0,5,0,3,0,0,
140 4,0,0,0,4,0,0,0,
141 3,0,0,5,0,3,0,0,
142 2,0,6,0,0,0,2,0,
143 1,7,0,0,0,0,0,1,
144 };
145
146 static const uint8_t brane[256]={
147 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163 };
164
165 static const uint8_t needs[16]={
166 0,1,0,0,
167 2,4,2,0,
168 0,1,0,0,
169 15
170 };
171
172 int x, y, b, r, l;
173 int16_t tmpIt [64*(32+HTAPS_MAX)];
174 uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175 int16_t *tmpI= tmpIt;
176 uint8_t *tmp2= tmp2t[0];
177 const uint8_t *hpel[11];
178 av_assert2(dx<16 && dy<16);
179 r= brane[dx + 16*dy]&15;
180 l= brane[dx + 16*dy]>>4;
181
182 b= needs[l] | needs[r];
183 if(p && !p->diag_mc)
184 b= 15;
185
186 if(b&5){
187 for(y=0; y < b_h+HTAPS_MAX-1; y++){
188 for(x=0; x < b_w; x++){
189 int a_1=src[x + HTAPS_MAX/2-4];
190 int a0= src[x + HTAPS_MAX/2-3];
191 int a1= src[x + HTAPS_MAX/2-2];
192 int a2= src[x + HTAPS_MAX/2-1];
193 int a3= src[x + HTAPS_MAX/2+0];
194 int a4= src[x + HTAPS_MAX/2+1];
195 int a5= src[x + HTAPS_MAX/2+2];
196 int a6= src[x + HTAPS_MAX/2+3];
197 int am=0;
198 if(!p || p->fast_mc){
199 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200 tmpI[x]= am;
201 am= (am+16)>>5;
202 }else{
203 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204 tmpI[x]= am;
205 am= (am+32)>>6;
206 }
207
208 if(am&(~255)) am= ~(am>>31);
209 tmp2[x]= am;
210 }
211 tmpI+= 64;
212 tmp2+= 64;
213 src += stride;
214 }
215 src -= stride*y;
216 }
217 src += HTAPS_MAX/2 - 1;
218 tmp2= tmp2t[1];
219
220 if(b&2){
221 for(y=0; y < b_h; y++){
222 for(x=0; x < b_w+1; x++){
223 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231 int am=0;
232 if(!p || p->fast_mc)
233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234 else
235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237 if(am&(~255)) am= ~(am>>31);
238 tmp2[x]= am;
239 }
240 src += stride;
241 tmp2+= 64;
242 }
243 src -= stride*y;
244 }
245 src += stride*(HTAPS_MAX/2 - 1);
246 tmp2= tmp2t[2];
247 tmpI= tmpIt;
248 if(b&4){
249 for(y=0; y < b_h; y++){
250 for(x=0; x < b_w; x++){
251 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259 int am=0;
260 if(!p || p->fast_mc)
261 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262 else
263 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264 if(am&(~255)) am= ~(am>>31);
265 tmp2[x]= am;
266 }
267 tmpI+= 64;
268 tmp2+= 64;
269 }
270 }
271
272 hpel[ 0]= src;
273 hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274 hpel[ 2]= src + 1;
275
276 hpel[ 4]= tmp2t[1];
277 hpel[ 5]= tmp2t[2];
278 hpel[ 6]= tmp2t[1] + 1;
279
280 hpel[ 8]= src + stride;
281 hpel[ 9]= hpel[1] + 64;
282 hpel[10]= hpel[8] + 1;
283
284 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286 if(b==15){
287 int dxy = dx / 8 + dy / 8 * 4;
288 const uint8_t *src1 = hpel[dxy ];
289 const uint8_t *src2 = hpel[dxy + 1];
290 const uint8_t *src3 = hpel[dxy + 4];
291 const uint8_t *src4 = hpel[dxy + 5];
292 int stride1 = MC_STRIDE(dxy);
293 int stride2 = MC_STRIDE(dxy + 1);
294 int stride3 = MC_STRIDE(dxy + 4);
295 int stride4 = MC_STRIDE(dxy + 5);
296 dx&=7;
297 dy&=7;
298 for(y=0; y < b_h; y++){
299 for(x=0; x < b_w; x++){
300 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
302 }
303 src1+=stride1;
304 src2+=stride2;
305 src3+=stride3;
306 src4+=stride4;
307 dst +=stride;
308 }
309 }else{
310 const uint8_t *src1= hpel[l];
311 const uint8_t *src2= hpel[r];
312 int stride1 = MC_STRIDE(l);
313 int stride2 = MC_STRIDE(r);
314 int a= weight[((dx&7) + (8*(dy&7)))];
315 int b= 8-a;
316 for(y=0; y < b_h; y++){
317 for(x=0; x < b_w; x++){
318 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319 }
320 src1+=stride1;
321 src2+=stride2;
322 dst +=stride;
323 }
324 }
325 }
326
ff_snow_pred_block(SnowContext * s,uint8_t * dst,uint8_t * tmp,ptrdiff_t stride,int sx,int sy,int b_w,int b_h,const BlockNode * block,int plane_index,int w,int h)327 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
328 if(block->type & BLOCK_INTRA){
329 int x, y;
330 const unsigned color = block->color[plane_index];
331 const unsigned color4 = color*0x01010101;
332 if(b_w==32){
333 for(y=0; y < b_h; y++){
334 *(uint32_t*)&dst[0 + y*stride]= color4;
335 *(uint32_t*)&dst[4 + y*stride]= color4;
336 *(uint32_t*)&dst[8 + y*stride]= color4;
337 *(uint32_t*)&dst[12+ y*stride]= color4;
338 *(uint32_t*)&dst[16+ y*stride]= color4;
339 *(uint32_t*)&dst[20+ y*stride]= color4;
340 *(uint32_t*)&dst[24+ y*stride]= color4;
341 *(uint32_t*)&dst[28+ y*stride]= color4;
342 }
343 }else if(b_w==16){
344 for(y=0; y < b_h; y++){
345 *(uint32_t*)&dst[0 + y*stride]= color4;
346 *(uint32_t*)&dst[4 + y*stride]= color4;
347 *(uint32_t*)&dst[8 + y*stride]= color4;
348 *(uint32_t*)&dst[12+ y*stride]= color4;
349 }
350 }else if(b_w==8){
351 for(y=0; y < b_h; y++){
352 *(uint32_t*)&dst[0 + y*stride]= color4;
353 *(uint32_t*)&dst[4 + y*stride]= color4;
354 }
355 }else if(b_w==4){
356 for(y=0; y < b_h; y++){
357 *(uint32_t*)&dst[0 + y*stride]= color4;
358 }
359 }else{
360 for(y=0; y < b_h; y++){
361 for(x=0; x < b_w; x++){
362 dst[x + y*stride]= color;
363 }
364 }
365 }
366 }else{
367 uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368 const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369 int mx= block->mx*scale;
370 int my= block->my*scale;
371 const int dx= mx&15;
372 const int dy= my&15;
373 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374 sx += (mx>>4) - (HTAPS_MAX/2-1);
375 sy += (my>>4) - (HTAPS_MAX/2-1);
376 src += sx + sy*stride;
377 if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378 || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379 s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380 stride, stride,
381 b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382 sx, sy, w, h);
383 src= tmp + MB_SIZE;
384 }
385
386 av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389 if( (dx&3) || (dy&3)
390 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391 || (b_w&(b_w-1))
392 || b_w == 1
393 || b_h == 1
394 || !s->plane[plane_index].fast_mc )
395 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396 else if(b_w==32){
397 int y;
398 for(y=0; y<b_h; y+=16){
399 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401 }
402 }else if(b_w==b_h)
403 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404 else if(b_w==2*b_h){
405 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
406 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407 }else{
408 av_assert2(2*b_w==b_h);
409 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
410 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411 }
412 }
413 }
414
415 #define mca(dx,dy,b_w)\
416 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417 av_assert2(h==b_w);\
418 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419 }
420
421 mca( 0, 0,16)
422 mca( 8, 0,16)
423 mca( 0, 8,16)
424 mca( 8, 8,16)
425 mca( 0, 0,8)
426 mca( 8, 0,8)
427 mca( 0, 8,8)
428 mca( 8, 8,8)
429
ff_snow_common_init(AVCodecContext * avctx)430 av_cold int ff_snow_common_init(AVCodecContext *avctx){
431 SnowContext *s = avctx->priv_data;
432 int width, height;
433 int i, j;
434
435 s->avctx= avctx;
436 s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437 s->spatial_decomposition_count = 1;
438
439 ff_me_cmp_init(&s->mecc, avctx);
440 ff_hpeldsp_init(&s->hdsp, avctx->flags);
441 ff_videodsp_init(&s->vdsp, 8);
442 ff_dwt_init(&s->dwt);
443 ff_h264qpel_init(&s->h264qpel, 8);
444
445 #define mcf(dx,dy)\
446 s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\
447 s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
448 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
449 s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\
450 s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
451 s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
452
453 mcf( 0, 0)
454 mcf( 4, 0)
455 mcf( 8, 0)
456 mcf(12, 0)
457 mcf( 0, 4)
458 mcf( 4, 4)
459 mcf( 8, 4)
460 mcf(12, 4)
461 mcf( 0, 8)
462 mcf( 4, 8)
463 mcf( 8, 8)
464 mcf(12, 8)
465 mcf( 0,12)
466 mcf( 4,12)
467 mcf( 8,12)
468 mcf(12,12)
469
470 #define mcfh(dx,dy)\
471 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
472 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
473 mc_block_hpel ## dx ## dy ## 16;\
474 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
475 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
476 mc_block_hpel ## dx ## dy ## 8;
477
478 mcfh(0, 0)
479 mcfh(8, 0)
480 mcfh(0, 8)
481 mcfh(8, 8)
482
483 init_qexp();
484
485 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
486
487 width= s->avctx->width;
488 height= s->avctx->height;
489
490 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
491 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer, width, height * sizeof(DWTELEM), fail); //FIXME this does not belong here
492 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer, width, sizeof(DWTELEM), fail);
493 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer, width, sizeof(IDWTELEM), fail);
494 FF_ALLOC_ARRAY_OR_GOTO(avctx, s->run_buffer, ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
495
496 for(i=0; i<MAX_REF_FRAMES; i++) {
497 for(j=0; j<MAX_REF_FRAMES; j++)
498 ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
499 s->last_picture[i] = av_frame_alloc();
500 if (!s->last_picture[i])
501 goto fail;
502 }
503
504 s->mconly_picture = av_frame_alloc();
505 s->current_picture = av_frame_alloc();
506 if (!s->mconly_picture || !s->current_picture)
507 goto fail;
508
509 return 0;
510 fail:
511 return AVERROR(ENOMEM);
512 }
513
ff_snow_common_init_after_header(AVCodecContext * avctx)514 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
515 SnowContext *s = avctx->priv_data;
516 int plane_index, level, orientation;
517 int ret, emu_buf_size;
518
519 if(!s->scratchbuf) {
520 if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
521 AV_GET_BUFFER_FLAG_REF)) < 0)
522 return ret;
523 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
524 emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
525 FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
526 }
527
528 if(s->mconly_picture->format != avctx->pix_fmt) {
529 av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
530 return AVERROR_INVALIDDATA;
531 }
532
533 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
534 int w= s->avctx->width;
535 int h= s->avctx->height;
536
537 if(plane_index){
538 w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
539 h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
540 }
541 s->plane[plane_index].width = w;
542 s->plane[plane_index].height= h;
543
544 for(level=s->spatial_decomposition_count-1; level>=0; level--){
545 for(orientation=level ? 1 : 0; orientation<4; orientation++){
546 SubBand *b= &s->plane[plane_index].band[level][orientation];
547
548 b->buf= s->spatial_dwt_buffer;
549 b->level= level;
550 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
551 b->width = (w + !(orientation&1))>>1;
552 b->height= (h + !(orientation>1))>>1;
553
554 b->stride_line = 1 << (s->spatial_decomposition_count - level);
555 b->buf_x_offset = 0;
556 b->buf_y_offset = 0;
557
558 if(orientation&1){
559 b->buf += (w+1)>>1;
560 b->buf_x_offset = (w+1)>>1;
561 }
562 if(orientation>1){
563 b->buf += b->stride>>1;
564 b->buf_y_offset = b->stride_line >> 1;
565 }
566 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
567
568 if(level)
569 b->parent= &s->plane[plane_index].band[level-1][orientation];
570 //FIXME avoid this realloc
571 av_freep(&b->x_coeff);
572 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
573 if (!b->x_coeff)
574 goto fail;
575 }
576 w= (w+1)>>1;
577 h= (h+1)>>1;
578 }
579 }
580
581 return 0;
582 fail:
583 return AVERROR(ENOMEM);
584 }
585
586 #define USE_HALFPEL_PLANE 0
587
halfpel_interpol(SnowContext * s,uint8_t * halfpel[4][4],AVFrame * frame)588 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
589 int p,x,y;
590
591 for(p=0; p < s->nb_planes; p++){
592 int is_chroma= !!p;
593 int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width, s->chroma_h_shift) : s->avctx->width;
594 int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
595 int ls= frame->linesize[p];
596 uint8_t *src= frame->data[p];
597
598 halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
599 halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
600 halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
601 if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
602 av_freep(&halfpel[1][p]);
603 av_freep(&halfpel[2][p]);
604 av_freep(&halfpel[3][p]);
605 return AVERROR(ENOMEM);
606 }
607 halfpel[1][p] += EDGE_WIDTH * (1 + ls);
608 halfpel[2][p] += EDGE_WIDTH * (1 + ls);
609 halfpel[3][p] += EDGE_WIDTH * (1 + ls);
610
611 halfpel[0][p]= src;
612 for(y=0; y<h; y++){
613 for(x=0; x<w; x++){
614 int i= y*ls + x;
615
616 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
617 }
618 }
619 for(y=0; y<h; y++){
620 for(x=0; x<w; x++){
621 int i= y*ls + x;
622
623 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
624 }
625 }
626 src= halfpel[1][p];
627 for(y=0; y<h; y++){
628 for(x=0; x<w; x++){
629 int i= y*ls + x;
630
631 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
632 }
633 }
634
635 //FIXME border!
636 }
637 return 0;
638 }
639
ff_snow_release_buffer(AVCodecContext * avctx)640 void ff_snow_release_buffer(AVCodecContext *avctx)
641 {
642 SnowContext *s = avctx->priv_data;
643 int i;
644
645 if(s->last_picture[s->max_ref_frames-1]->data[0]){
646 av_frame_unref(s->last_picture[s->max_ref_frames-1]);
647 for(i=0; i<9; i++)
648 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
649 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
650 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
651 }
652 }
653 }
654
ff_snow_frame_start(SnowContext * s)655 int ff_snow_frame_start(SnowContext *s){
656 AVFrame *tmp;
657 int i, ret;
658
659 ff_snow_release_buffer(s->avctx);
660
661 tmp= s->last_picture[s->max_ref_frames-1];
662 for(i=s->max_ref_frames-1; i>0; i--)
663 s->last_picture[i] = s->last_picture[i-1];
664 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
665 if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
666 if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
667 return ret;
668 }
669 s->last_picture[0] = s->current_picture;
670 s->current_picture = tmp;
671
672 if(s->keyframe){
673 s->ref_frames= 0;
674 }else{
675 int i;
676 for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
677 if(i && s->last_picture[i-1]->key_frame)
678 break;
679 s->ref_frames= i;
680 if(s->ref_frames==0){
681 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
682 return AVERROR_INVALIDDATA;
683 }
684 }
685 if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
686 return ret;
687
688 s->current_picture->key_frame= s->keyframe;
689
690 return 0;
691 }
692
ff_snow_common_end(SnowContext * s)693 av_cold void ff_snow_common_end(SnowContext *s)
694 {
695 int plane_index, level, orientation, i;
696
697 av_freep(&s->spatial_dwt_buffer);
698 av_freep(&s->temp_dwt_buffer);
699 av_freep(&s->spatial_idwt_buffer);
700 av_freep(&s->temp_idwt_buffer);
701 av_freep(&s->run_buffer);
702
703 s->m.me.temp= NULL;
704 av_freep(&s->m.me.scratchpad);
705 av_freep(&s->m.me.map);
706 av_freep(&s->m.me.score_map);
707 av_freep(&s->m.sc.obmc_scratchpad);
708
709 av_freep(&s->block);
710 av_freep(&s->scratchbuf);
711 av_freep(&s->emu_edge_buffer);
712
713 for(i=0; i<MAX_REF_FRAMES; i++){
714 av_freep(&s->ref_mvs[i]);
715 av_freep(&s->ref_scores[i]);
716 if(s->last_picture[i] && s->last_picture[i]->data[0]) {
717 av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
718 }
719 av_frame_free(&s->last_picture[i]);
720 }
721
722 for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
723 for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
724 for(orientation=level ? 1 : 0; orientation<4; orientation++){
725 SubBand *b= &s->plane[plane_index].band[level][orientation];
726
727 av_freep(&b->x_coeff);
728 }
729 }
730 }
731 av_frame_free(&s->mconly_picture);
732 av_frame_free(&s->current_picture);
733 }
734