1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/thread.h"
25 #include "avcodec.h"
26 #include "me_cmp.h"
27 #include "snow_dwt.h"
28 #include "internal.h"
29 #include "snow.h"
30 #include "snowdata.h"
31
32 #include "rangecoder.h"
33 #include "mathops.h"
34 #include "h263.h"
35
36
ff_snow_inner_add_yblock(const uint8_t * obmc,const int obmc_stride,uint8_t ** block,int b_w,int b_h,int src_x,int src_y,int src_stride,slice_buffer * sb,int add,uint8_t * dst8)37 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
38 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
39 int y, x;
40 IDWTELEM * dst;
41 for(y=0; y<b_h; y++){
42 //FIXME ugly misuse of obmc_stride
43 const uint8_t *obmc1= obmc + y*obmc_stride;
44 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
45 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
46 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
47 dst = slice_buffer_get_line(sb, src_y + y);
48 for(x=0; x<b_w; x++){
49 int v= obmc1[x] * block[3][x + y*src_stride]
50 +obmc2[x] * block[2][x + y*src_stride]
51 +obmc3[x] * block[1][x + y*src_stride]
52 +obmc4[x] * block[0][x + y*src_stride];
53
54 v <<= 8 - LOG2_OBMC_MAX;
55 if(FRAC_BITS != 8){
56 v >>= 8 - FRAC_BITS;
57 }
58 if(add){
59 v += dst[x + src_x];
60 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
61 if(v&(~255)) v= ~(v>>31);
62 dst8[x + y*src_stride] = v;
63 }else{
64 dst[x + src_x] -= v;
65 }
66 }
67 }
68 }
69
ff_snow_get_buffer(SnowContext * s,AVFrame * frame)70 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
71 {
72 int ret, i;
73 int edges_needed = av_codec_is_encoder(s->avctx->codec);
74
75 frame->width = s->avctx->width ;
76 frame->height = s->avctx->height;
77 if (edges_needed) {
78 frame->width += 2 * EDGE_WIDTH;
79 frame->height += 2 * EDGE_WIDTH;
80 }
81 if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
82 return ret;
83 if (edges_needed) {
84 for (i = 0; frame->data[i]; i++) {
85 int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
86 frame->linesize[i] +
87 (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
88 frame->data[i] += offset;
89 }
90 frame->width = s->avctx->width;
91 frame->height = s->avctx->height;
92 }
93
94 return 0;
95 }
96
ff_snow_reset_contexts(SnowContext * s)97 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
98 int plane_index, level, orientation;
99
100 for(plane_index=0; plane_index<3; plane_index++){
101 for(level=0; level<MAX_DECOMPOSITIONS; level++){
102 for(orientation=level ? 1:0; orientation<4; orientation++){
103 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
104 }
105 }
106 }
107 memset(s->header_state, MID_STATE, sizeof(s->header_state));
108 memset(s->block_state, MID_STATE, sizeof(s->block_state));
109 }
110
ff_snow_alloc_blocks(SnowContext * s)111 int ff_snow_alloc_blocks(SnowContext *s){
112 int w= AV_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
113 int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
114
115 s->b_width = w;
116 s->b_height= h;
117
118 av_free(s->block);
119 s->block= av_mallocz_array(w * h, sizeof(BlockNode) << (s->block_max_depth*2));
120 if (!s->block)
121 return AVERROR(ENOMEM);
122
123 return 0;
124 }
125
init_qexp(void)126 static av_cold void init_qexp(void){
127 int i;
128 double v=128;
129
130 for(i=0; i<QROOT; i++){
131 ff_qexp[i]= lrintf(v);
132 v *= pow(2, 1.0 / QROOT);
133 }
134 }
mc_block(Plane * p,uint8_t * dst,const uint8_t * src,int stride,int b_w,int b_h,int dx,int dy)135 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
136 static const uint8_t weight[64]={
137 8,7,6,5,4,3,2,1,
138 7,7,0,0,0,0,0,1,
139 6,0,6,0,0,0,2,0,
140 5,0,0,5,0,3,0,0,
141 4,0,0,0,4,0,0,0,
142 3,0,0,5,0,3,0,0,
143 2,0,6,0,0,0,2,0,
144 1,7,0,0,0,0,0,1,
145 };
146
147 static const uint8_t brane[256]={
148 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
149 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
150 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
151 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
152 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
153 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
154 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
155 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
156 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
157 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
158 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
159 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
160 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
161 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
162 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
163 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
164 };
165
166 static const uint8_t needs[16]={
167 0,1,0,0,
168 2,4,2,0,
169 0,1,0,0,
170 15
171 };
172
173 int x, y, b, r, l;
174 int16_t tmpIt [64*(32+HTAPS_MAX)];
175 uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
176 int16_t *tmpI= tmpIt;
177 uint8_t *tmp2= tmp2t[0];
178 const uint8_t *hpel[11];
179 av_assert2(dx<16 && dy<16);
180 r= brane[dx + 16*dy]&15;
181 l= brane[dx + 16*dy]>>4;
182
183 b= needs[l] | needs[r];
184 if(p && !p->diag_mc)
185 b= 15;
186
187 if(b&5){
188 for(y=0; y < b_h+HTAPS_MAX-1; y++){
189 for(x=0; x < b_w; x++){
190 int a_1=src[x + HTAPS_MAX/2-4];
191 int a0= src[x + HTAPS_MAX/2-3];
192 int a1= src[x + HTAPS_MAX/2-2];
193 int a2= src[x + HTAPS_MAX/2-1];
194 int a3= src[x + HTAPS_MAX/2+0];
195 int a4= src[x + HTAPS_MAX/2+1];
196 int a5= src[x + HTAPS_MAX/2+2];
197 int a6= src[x + HTAPS_MAX/2+3];
198 int am=0;
199 if(!p || p->fast_mc){
200 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
201 tmpI[x]= am;
202 am= (am+16)>>5;
203 }else{
204 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
205 tmpI[x]= am;
206 am= (am+32)>>6;
207 }
208
209 if(am&(~255)) am= ~(am>>31);
210 tmp2[x]= am;
211 }
212 tmpI+= 64;
213 tmp2+= 64;
214 src += stride;
215 }
216 src -= stride*y;
217 }
218 src += HTAPS_MAX/2 - 1;
219 tmp2= tmp2t[1];
220
221 if(b&2){
222 for(y=0; y < b_h; y++){
223 for(x=0; x < b_w+1; x++){
224 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
225 int a0= src[x + (HTAPS_MAX/2-3)*stride];
226 int a1= src[x + (HTAPS_MAX/2-2)*stride];
227 int a2= src[x + (HTAPS_MAX/2-1)*stride];
228 int a3= src[x + (HTAPS_MAX/2+0)*stride];
229 int a4= src[x + (HTAPS_MAX/2+1)*stride];
230 int a5= src[x + (HTAPS_MAX/2+2)*stride];
231 int a6= src[x + (HTAPS_MAX/2+3)*stride];
232 int am=0;
233 if(!p || p->fast_mc)
234 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
235 else
236 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
237
238 if(am&(~255)) am= ~(am>>31);
239 tmp2[x]= am;
240 }
241 src += stride;
242 tmp2+= 64;
243 }
244 src -= stride*y;
245 }
246 src += stride*(HTAPS_MAX/2 - 1);
247 tmp2= tmp2t[2];
248 tmpI= tmpIt;
249 if(b&4){
250 for(y=0; y < b_h; y++){
251 for(x=0; x < b_w; x++){
252 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
253 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
254 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
255 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
256 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
257 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
258 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
259 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
260 int am=0;
261 if(!p || p->fast_mc)
262 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
263 else
264 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
265 if(am&(~255)) am= ~(am>>31);
266 tmp2[x]= am;
267 }
268 tmpI+= 64;
269 tmp2+= 64;
270 }
271 }
272
273 hpel[ 0]= src;
274 hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
275 hpel[ 2]= src + 1;
276
277 hpel[ 4]= tmp2t[1];
278 hpel[ 5]= tmp2t[2];
279 hpel[ 6]= tmp2t[1] + 1;
280
281 hpel[ 8]= src + stride;
282 hpel[ 9]= hpel[1] + 64;
283 hpel[10]= hpel[8] + 1;
284
285 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
286
287 if(b==15){
288 int dxy = dx / 8 + dy / 8 * 4;
289 const uint8_t *src1 = hpel[dxy ];
290 const uint8_t *src2 = hpel[dxy + 1];
291 const uint8_t *src3 = hpel[dxy + 4];
292 const uint8_t *src4 = hpel[dxy + 5];
293 int stride1 = MC_STRIDE(dxy);
294 int stride2 = MC_STRIDE(dxy + 1);
295 int stride3 = MC_STRIDE(dxy + 4);
296 int stride4 = MC_STRIDE(dxy + 5);
297 dx&=7;
298 dy&=7;
299 for(y=0; y < b_h; y++){
300 for(x=0; x < b_w; x++){
301 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
302 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
303 }
304 src1+=stride1;
305 src2+=stride2;
306 src3+=stride3;
307 src4+=stride4;
308 dst +=stride;
309 }
310 }else{
311 const uint8_t *src1= hpel[l];
312 const uint8_t *src2= hpel[r];
313 int stride1 = MC_STRIDE(l);
314 int stride2 = MC_STRIDE(r);
315 int a= weight[((dx&7) + (8*(dy&7)))];
316 int b= 8-a;
317 for(y=0; y < b_h; y++){
318 for(x=0; x < b_w; x++){
319 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
320 }
321 src1+=stride1;
322 src2+=stride2;
323 dst +=stride;
324 }
325 }
326 }
327
ff_snow_pred_block(SnowContext * s,uint8_t * dst,uint8_t * tmp,ptrdiff_t stride,int sx,int sy,int b_w,int b_h,const BlockNode * block,int plane_index,int w,int h)328 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
329 if(block->type & BLOCK_INTRA){
330 int x, y;
331 const unsigned color = block->color[plane_index];
332 const unsigned color4 = color*0x01010101;
333 if(b_w==32){
334 for(y=0; y < b_h; y++){
335 *(uint32_t*)&dst[0 + y*stride]= color4;
336 *(uint32_t*)&dst[4 + y*stride]= color4;
337 *(uint32_t*)&dst[8 + y*stride]= color4;
338 *(uint32_t*)&dst[12+ y*stride]= color4;
339 *(uint32_t*)&dst[16+ y*stride]= color4;
340 *(uint32_t*)&dst[20+ y*stride]= color4;
341 *(uint32_t*)&dst[24+ y*stride]= color4;
342 *(uint32_t*)&dst[28+ y*stride]= color4;
343 }
344 }else if(b_w==16){
345 for(y=0; y < b_h; y++){
346 *(uint32_t*)&dst[0 + y*stride]= color4;
347 *(uint32_t*)&dst[4 + y*stride]= color4;
348 *(uint32_t*)&dst[8 + y*stride]= color4;
349 *(uint32_t*)&dst[12+ y*stride]= color4;
350 }
351 }else if(b_w==8){
352 for(y=0; y < b_h; y++){
353 *(uint32_t*)&dst[0 + y*stride]= color4;
354 *(uint32_t*)&dst[4 + y*stride]= color4;
355 }
356 }else if(b_w==4){
357 for(y=0; y < b_h; y++){
358 *(uint32_t*)&dst[0 + y*stride]= color4;
359 }
360 }else{
361 for(y=0; y < b_h; y++){
362 for(x=0; x < b_w; x++){
363 dst[x + y*stride]= color;
364 }
365 }
366 }
367 }else{
368 uint8_t *src= s->last_picture[block->ref]->data[plane_index];
369 const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
370 int mx= block->mx*scale;
371 int my= block->my*scale;
372 const int dx= mx&15;
373 const int dy= my&15;
374 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
375 sx += (mx>>4) - (HTAPS_MAX/2-1);
376 sy += (my>>4) - (HTAPS_MAX/2-1);
377 src += sx + sy*stride;
378 if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
379 || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
380 s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
381 stride, stride,
382 b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
383 sx, sy, w, h);
384 src= tmp + MB_SIZE;
385 }
386
387 av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
388
389 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
390 if( (dx&3) || (dy&3)
391 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
392 || (b_w&(b_w-1))
393 || b_w == 1
394 || b_h == 1
395 || !s->plane[plane_index].fast_mc )
396 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
397 else if(b_w==32){
398 int y;
399 for(y=0; y<b_h; y+=16){
400 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
401 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
402 }
403 }else if(b_w==b_h)
404 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
405 else if(b_w==2*b_h){
406 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
407 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
408 }else{
409 av_assert2(2*b_w==b_h);
410 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
411 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
412 }
413 }
414 }
415
416 #define mca(dx,dy,b_w)\
417 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
418 av_assert2(h==b_w);\
419 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
420 }
421
422 mca( 0, 0,16)
423 mca( 8, 0,16)
424 mca( 0, 8,16)
425 mca( 8, 8,16)
426 mca( 0, 0,8)
427 mca( 8, 0,8)
428 mca( 0, 8,8)
429 mca( 8, 8,8)
430
snow_static_init(void)431 static av_cold void snow_static_init(void)
432 {
433 for (int i = 0; i < MAX_REF_FRAMES; i++)
434 for (int j = 0; j < MAX_REF_FRAMES; j++)
435 ff_scale_mv_ref[i][j] = 256 * (i + 1) / (j + 1);
436 init_qexp();
437 }
438
ff_snow_common_init(AVCodecContext * avctx)439 av_cold int ff_snow_common_init(AVCodecContext *avctx){
440 static AVOnce init_static_once = AV_ONCE_INIT;
441 SnowContext *s = avctx->priv_data;
442 int width, height;
443 int i;
444
445 s->avctx= avctx;
446 s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
447 s->spatial_decomposition_count = 1;
448
449 ff_me_cmp_init(&s->mecc, avctx);
450 ff_hpeldsp_init(&s->hdsp, avctx->flags);
451 ff_videodsp_init(&s->vdsp, 8);
452 ff_dwt_init(&s->dwt);
453 ff_h264qpel_init(&s->h264qpel, 8);
454
455 #define mcf(dx,dy)\
456 s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\
457 s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
458 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
459 s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\
460 s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
461 s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
462
463 mcf( 0, 0)
464 mcf( 4, 0)
465 mcf( 8, 0)
466 mcf(12, 0)
467 mcf( 0, 4)
468 mcf( 4, 4)
469 mcf( 8, 4)
470 mcf(12, 4)
471 mcf( 0, 8)
472 mcf( 4, 8)
473 mcf( 8, 8)
474 mcf(12, 8)
475 mcf( 0,12)
476 mcf( 4,12)
477 mcf( 8,12)
478 mcf(12,12)
479
480 #define mcfh(dx,dy)\
481 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
482 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
483 mc_block_hpel ## dx ## dy ## 16;\
484 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
485 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
486 mc_block_hpel ## dx ## dy ## 8;
487
488 mcfh(0, 0)
489 mcfh(8, 0)
490 mcfh(0, 8)
491 mcfh(8, 8)
492
493 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
494
495 width= s->avctx->width;
496 height= s->avctx->height;
497
498 if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
499 !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer, width * height) || //FIXME this does not belong here
500 !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer, width) ||
501 !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer, width) ||
502 !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
503 return AVERROR(ENOMEM);
504
505 for(i=0; i<MAX_REF_FRAMES; i++) {
506 s->last_picture[i] = av_frame_alloc();
507 if (!s->last_picture[i])
508 return AVERROR(ENOMEM);
509 }
510
511 s->mconly_picture = av_frame_alloc();
512 s->current_picture = av_frame_alloc();
513 if (!s->mconly_picture || !s->current_picture)
514 return AVERROR(ENOMEM);
515
516 ff_thread_once(&init_static_once, snow_static_init);
517
518 return 0;
519 }
520
ff_snow_common_init_after_header(AVCodecContext * avctx)521 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
522 SnowContext *s = avctx->priv_data;
523 int plane_index, level, orientation;
524 int ret, emu_buf_size;
525
526 if(!s->scratchbuf) {
527 if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
528 AV_GET_BUFFER_FLAG_REF)) < 0)
529 return ret;
530 emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
531 if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
532 !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
533 return AVERROR(ENOMEM);
534 }
535
536 if(s->mconly_picture->format != avctx->pix_fmt) {
537 av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
538 return AVERROR_INVALIDDATA;
539 }
540
541 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
542 int w= s->avctx->width;
543 int h= s->avctx->height;
544
545 if(plane_index){
546 w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
547 h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
548 }
549 s->plane[plane_index].width = w;
550 s->plane[plane_index].height= h;
551
552 for(level=s->spatial_decomposition_count-1; level>=0; level--){
553 for(orientation=level ? 1 : 0; orientation<4; orientation++){
554 SubBand *b= &s->plane[plane_index].band[level][orientation];
555
556 b->buf= s->spatial_dwt_buffer;
557 b->level= level;
558 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
559 b->width = (w + !(orientation&1))>>1;
560 b->height= (h + !(orientation>1))>>1;
561
562 b->stride_line = 1 << (s->spatial_decomposition_count - level);
563 b->buf_x_offset = 0;
564 b->buf_y_offset = 0;
565
566 if(orientation&1){
567 b->buf += (w+1)>>1;
568 b->buf_x_offset = (w+1)>>1;
569 }
570 if(orientation>1){
571 b->buf += b->stride>>1;
572 b->buf_y_offset = b->stride_line >> 1;
573 }
574 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
575
576 if(level)
577 b->parent= &s->plane[plane_index].band[level-1][orientation];
578 //FIXME avoid this realloc
579 av_freep(&b->x_coeff);
580 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
581 if (!b->x_coeff)
582 return AVERROR(ENOMEM);
583 }
584 w= (w+1)>>1;
585 h= (h+1)>>1;
586 }
587 }
588
589 return 0;
590 }
591
592 #define USE_HALFPEL_PLANE 0
593
halfpel_interpol(SnowContext * s,uint8_t * halfpel[4][4],AVFrame * frame)594 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
595 int p,x,y;
596
597 for(p=0; p < s->nb_planes; p++){
598 int is_chroma= !!p;
599 int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width, s->chroma_h_shift) : s->avctx->width;
600 int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
601 int ls= frame->linesize[p];
602 uint8_t *src= frame->data[p];
603
604 halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
605 halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
606 halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
607 if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
608 av_freep(&halfpel[1][p]);
609 av_freep(&halfpel[2][p]);
610 av_freep(&halfpel[3][p]);
611 return AVERROR(ENOMEM);
612 }
613 halfpel[1][p] += EDGE_WIDTH * (1 + ls);
614 halfpel[2][p] += EDGE_WIDTH * (1 + ls);
615 halfpel[3][p] += EDGE_WIDTH * (1 + ls);
616
617 halfpel[0][p]= src;
618 for(y=0; y<h; y++){
619 for(x=0; x<w; x++){
620 int i= y*ls + x;
621
622 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
623 }
624 }
625 for(y=0; y<h; y++){
626 for(x=0; x<w; x++){
627 int i= y*ls + x;
628
629 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
630 }
631 }
632 src= halfpel[1][p];
633 for(y=0; y<h; y++){
634 for(x=0; x<w; x++){
635 int i= y*ls + x;
636
637 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
638 }
639 }
640
641 //FIXME border!
642 }
643 return 0;
644 }
645
ff_snow_release_buffer(AVCodecContext * avctx)646 void ff_snow_release_buffer(AVCodecContext *avctx)
647 {
648 SnowContext *s = avctx->priv_data;
649 int i;
650
651 if(s->last_picture[s->max_ref_frames-1]->data[0]){
652 av_frame_unref(s->last_picture[s->max_ref_frames-1]);
653 for(i=0; i<9; i++)
654 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
655 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
656 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
657 }
658 }
659 }
660
ff_snow_frame_start(SnowContext * s)661 int ff_snow_frame_start(SnowContext *s){
662 AVFrame *tmp;
663 int i, ret;
664
665 ff_snow_release_buffer(s->avctx);
666
667 tmp= s->last_picture[s->max_ref_frames-1];
668 for(i=s->max_ref_frames-1; i>0; i--)
669 s->last_picture[i] = s->last_picture[i-1];
670 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
671 if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
672 if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
673 return ret;
674 }
675 s->last_picture[0] = s->current_picture;
676 s->current_picture = tmp;
677
678 if(s->keyframe){
679 s->ref_frames= 0;
680 }else{
681 int i;
682 for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
683 if(i && s->last_picture[i-1]->key_frame)
684 break;
685 s->ref_frames= i;
686 if(s->ref_frames==0){
687 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
688 return AVERROR_INVALIDDATA;
689 }
690 }
691 if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
692 return ret;
693
694 s->current_picture->key_frame= s->keyframe;
695
696 return 0;
697 }
698
ff_snow_common_end(SnowContext * s)699 av_cold void ff_snow_common_end(SnowContext *s)
700 {
701 int plane_index, level, orientation, i;
702
703 av_freep(&s->spatial_dwt_buffer);
704 av_freep(&s->temp_dwt_buffer);
705 av_freep(&s->spatial_idwt_buffer);
706 av_freep(&s->temp_idwt_buffer);
707 av_freep(&s->run_buffer);
708
709 s->m.me.temp= NULL;
710 av_freep(&s->m.me.scratchpad);
711 av_freep(&s->m.me.map);
712 av_freep(&s->m.me.score_map);
713 av_freep(&s->m.sc.obmc_scratchpad);
714
715 av_freep(&s->block);
716 av_freep(&s->scratchbuf);
717 av_freep(&s->emu_edge_buffer);
718
719 for(i=0; i<MAX_REF_FRAMES; i++){
720 av_freep(&s->ref_mvs[i]);
721 av_freep(&s->ref_scores[i]);
722 if(s->last_picture[i] && s->last_picture[i]->data[0]) {
723 av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
724 }
725 av_frame_free(&s->last_picture[i]);
726 }
727
728 for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
729 for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
730 for(orientation=level ? 1 : 0; orientation<4; orientation++){
731 SubBand *b= &s->plane[plane_index].band[level][orientation];
732
733 av_freep(&b->x_coeff);
734 }
735 }
736 }
737 av_frame_free(&s->mconly_picture);
738 av_frame_free(&s->current_picture);
739 }
740