• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/thread.h"
25 #include "avcodec.h"
26 #include "encode.h"
27 #include "me_cmp.h"
28 #include "snow_dwt.h"
29 #include "internal.h"
30 #include "snow.h"
31 #include "snowdata.h"
32 
33 #include "rangecoder.h"
34 #include "mathops.h"
35 
36 
ff_snow_inner_add_yblock(const uint8_t * obmc,const int obmc_stride,uint8_t ** block,int b_w,int b_h,int src_x,int src_y,int src_stride,slice_buffer * sb,int add,uint8_t * dst8)37 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
38                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
39     int y, x;
40     IDWTELEM * dst;
41     for(y=0; y<b_h; y++){
42         //FIXME ugly misuse of obmc_stride
43         const uint8_t *obmc1= obmc + y*obmc_stride;
44         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
45         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
46         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
47         dst = slice_buffer_get_line(sb, src_y + y);
48         for(x=0; x<b_w; x++){
49             int v=   obmc1[x] * block[3][x + y*src_stride]
50                     +obmc2[x] * block[2][x + y*src_stride]
51                     +obmc3[x] * block[1][x + y*src_stride]
52                     +obmc4[x] * block[0][x + y*src_stride];
53 
54             v <<= 8 - LOG2_OBMC_MAX;
55             if(FRAC_BITS != 8){
56                 v >>= 8 - FRAC_BITS;
57             }
58             if(add){
59                 v += dst[x + src_x];
60                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
61                 if(v&(~255)) v= ~(v>>31);
62                 dst8[x + y*src_stride] = v;
63             }else{
64                 dst[x + src_x] -= v;
65             }
66         }
67     }
68 }
69 
ff_snow_get_buffer(SnowContext * s,AVFrame * frame)70 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
71 {
72     int ret, i;
73     int edges_needed = av_codec_is_encoder(s->avctx->codec);
74 
75     frame->width  = s->avctx->width ;
76     frame->height = s->avctx->height;
77     if (edges_needed) {
78         frame->width  += 2 * EDGE_WIDTH;
79         frame->height += 2 * EDGE_WIDTH;
80 
81         ret = ff_encode_alloc_frame(s->avctx, frame);
82     } else
83         ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF);
84     if (ret < 0)
85         return ret;
86     if (edges_needed) {
87         for (i = 0; frame->data[i]; i++) {
88             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
89                             frame->linesize[i] +
90                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
91             frame->data[i] += offset;
92         }
93         frame->width  = s->avctx->width;
94         frame->height = s->avctx->height;
95     }
96 
97     return 0;
98 }
99 
ff_snow_reset_contexts(SnowContext * s)100 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
101     int plane_index, level, orientation;
102 
103     for(plane_index=0; plane_index<3; plane_index++){
104         for(level=0; level<MAX_DECOMPOSITIONS; level++){
105             for(orientation=level ? 1:0; orientation<4; orientation++){
106                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
107             }
108         }
109     }
110     memset(s->header_state, MID_STATE, sizeof(s->header_state));
111     memset(s->block_state, MID_STATE, sizeof(s->block_state));
112 }
113 
ff_snow_alloc_blocks(SnowContext * s)114 int ff_snow_alloc_blocks(SnowContext *s){
115     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
116     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
117 
118     s->b_width = w;
119     s->b_height= h;
120 
121     av_free(s->block);
122     s->block = av_calloc(w * h,  sizeof(*s->block) << (s->block_max_depth*2));
123     if (!s->block)
124         return AVERROR(ENOMEM);
125 
126     return 0;
127 }
128 
mc_block(Plane * p,uint8_t * dst,const uint8_t * src,int stride,int b_w,int b_h,int dx,int dy)129 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
130     static const uint8_t weight[64]={
131     8,7,6,5,4,3,2,1,
132     7,7,0,0,0,0,0,1,
133     6,0,6,0,0,0,2,0,
134     5,0,0,5,0,3,0,0,
135     4,0,0,0,4,0,0,0,
136     3,0,0,5,0,3,0,0,
137     2,0,6,0,0,0,2,0,
138     1,7,0,0,0,0,0,1,
139     };
140 
141     static const uint8_t brane[256]={
142     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
143     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
144     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
145     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
146     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
147     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
148     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
149     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
150     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
151     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
152     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
153     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
154     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
155     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
156     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
157     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
158     };
159 
160     static const uint8_t needs[16]={
161     0,1,0,0,
162     2,4,2,0,
163     0,1,0,0,
164     15
165     };
166 
167     int x, y, b, r, l;
168     int16_t tmpIt   [64*(32+HTAPS_MAX)];
169     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
170     int16_t *tmpI= tmpIt;
171     uint8_t *tmp2= tmp2t[0];
172     const uint8_t *hpel[11];
173     av_assert2(dx<16 && dy<16);
174     r= brane[dx + 16*dy]&15;
175     l= brane[dx + 16*dy]>>4;
176 
177     b= needs[l] | needs[r];
178     if(p && !p->diag_mc)
179         b= 15;
180 
181     if(b&5){
182         for(y=0; y < b_h+HTAPS_MAX-1; y++){
183             for(x=0; x < b_w; x++){
184                 int a_1=src[x + HTAPS_MAX/2-4];
185                 int a0= src[x + HTAPS_MAX/2-3];
186                 int a1= src[x + HTAPS_MAX/2-2];
187                 int a2= src[x + HTAPS_MAX/2-1];
188                 int a3= src[x + HTAPS_MAX/2+0];
189                 int a4= src[x + HTAPS_MAX/2+1];
190                 int a5= src[x + HTAPS_MAX/2+2];
191                 int a6= src[x + HTAPS_MAX/2+3];
192                 int am=0;
193                 if(!p || p->fast_mc){
194                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
195                     tmpI[x]= am;
196                     am= (am+16)>>5;
197                 }else{
198                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
199                     tmpI[x]= am;
200                     am= (am+32)>>6;
201                 }
202 
203                 if(am&(~255)) am= ~(am>>31);
204                 tmp2[x]= am;
205             }
206             tmpI+= 64;
207             tmp2+= 64;
208             src += stride;
209         }
210         src -= stride*y;
211     }
212     src += HTAPS_MAX/2 - 1;
213     tmp2= tmp2t[1];
214 
215     if(b&2){
216         for(y=0; y < b_h; y++){
217             for(x=0; x < b_w+1; x++){
218                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
219                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
220                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
221                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
222                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
223                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
224                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
225                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
226                 int am=0;
227                 if(!p || p->fast_mc)
228                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
229                 else
230                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
231 
232                 if(am&(~255)) am= ~(am>>31);
233                 tmp2[x]= am;
234             }
235             src += stride;
236             tmp2+= 64;
237         }
238         src -= stride*y;
239     }
240     src += stride*(HTAPS_MAX/2 - 1);
241     tmp2= tmp2t[2];
242     tmpI= tmpIt;
243     if(b&4){
244         for(y=0; y < b_h; y++){
245             for(x=0; x < b_w; x++){
246                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
247                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
248                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
249                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
250                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
251                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
252                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
253                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
254                 int am=0;
255                 if(!p || p->fast_mc)
256                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
257                 else
258                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
259                 if(am&(~255)) am= ~(am>>31);
260                 tmp2[x]= am;
261             }
262             tmpI+= 64;
263             tmp2+= 64;
264         }
265     }
266 
267     hpel[ 0]= src;
268     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
269     hpel[ 2]= src + 1;
270 
271     hpel[ 4]= tmp2t[1];
272     hpel[ 5]= tmp2t[2];
273     hpel[ 6]= tmp2t[1] + 1;
274 
275     hpel[ 8]= src + stride;
276     hpel[ 9]= hpel[1] + 64;
277     hpel[10]= hpel[8] + 1;
278 
279 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
280 
281     if(b==15){
282         int dxy = dx / 8 + dy / 8 * 4;
283         const uint8_t *src1 = hpel[dxy    ];
284         const uint8_t *src2 = hpel[dxy + 1];
285         const uint8_t *src3 = hpel[dxy + 4];
286         const uint8_t *src4 = hpel[dxy + 5];
287         int stride1 = MC_STRIDE(dxy);
288         int stride2 = MC_STRIDE(dxy + 1);
289         int stride3 = MC_STRIDE(dxy + 4);
290         int stride4 = MC_STRIDE(dxy + 5);
291         dx&=7;
292         dy&=7;
293         for(y=0; y < b_h; y++){
294             for(x=0; x < b_w; x++){
295                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
296                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
297             }
298             src1+=stride1;
299             src2+=stride2;
300             src3+=stride3;
301             src4+=stride4;
302             dst +=stride;
303         }
304     }else{
305         const uint8_t *src1= hpel[l];
306         const uint8_t *src2= hpel[r];
307         int stride1 = MC_STRIDE(l);
308         int stride2 = MC_STRIDE(r);
309         int a= weight[((dx&7) + (8*(dy&7)))];
310         int b= 8-a;
311         for(y=0; y < b_h; y++){
312             for(x=0; x < b_w; x++){
313                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
314             }
315             src1+=stride1;
316             src2+=stride2;
317             dst +=stride;
318         }
319     }
320 }
321 
ff_snow_pred_block(SnowContext * s,uint8_t * dst,uint8_t * tmp,ptrdiff_t stride,int sx,int sy,int b_w,int b_h,const BlockNode * block,int plane_index,int w,int h)322 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
323     if(block->type & BLOCK_INTRA){
324         int x, y;
325         const unsigned color  = block->color[plane_index];
326         const unsigned color4 = color*0x01010101;
327         if(b_w==32){
328             for(y=0; y < b_h; y++){
329                 *(uint32_t*)&dst[0 + y*stride]= color4;
330                 *(uint32_t*)&dst[4 + y*stride]= color4;
331                 *(uint32_t*)&dst[8 + y*stride]= color4;
332                 *(uint32_t*)&dst[12+ y*stride]= color4;
333                 *(uint32_t*)&dst[16+ y*stride]= color4;
334                 *(uint32_t*)&dst[20+ y*stride]= color4;
335                 *(uint32_t*)&dst[24+ y*stride]= color4;
336                 *(uint32_t*)&dst[28+ y*stride]= color4;
337             }
338         }else if(b_w==16){
339             for(y=0; y < b_h; y++){
340                 *(uint32_t*)&dst[0 + y*stride]= color4;
341                 *(uint32_t*)&dst[4 + y*stride]= color4;
342                 *(uint32_t*)&dst[8 + y*stride]= color4;
343                 *(uint32_t*)&dst[12+ y*stride]= color4;
344             }
345         }else if(b_w==8){
346             for(y=0; y < b_h; y++){
347                 *(uint32_t*)&dst[0 + y*stride]= color4;
348                 *(uint32_t*)&dst[4 + y*stride]= color4;
349             }
350         }else if(b_w==4){
351             for(y=0; y < b_h; y++){
352                 *(uint32_t*)&dst[0 + y*stride]= color4;
353             }
354         }else{
355             for(y=0; y < b_h; y++){
356                 for(x=0; x < b_w; x++){
357                     dst[x + y*stride]= color;
358                 }
359             }
360         }
361     }else{
362         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
363         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
364         int mx= block->mx*scale;
365         int my= block->my*scale;
366         const int dx= mx&15;
367         const int dy= my&15;
368         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
369         sx += (mx>>4) - (HTAPS_MAX/2-1);
370         sy += (my>>4) - (HTAPS_MAX/2-1);
371         src += sx + sy*stride;
372         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
373            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
374             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
375                                      stride, stride,
376                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
377                                      sx, sy, w, h);
378             src= tmp + MB_SIZE;
379         }
380 
381         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
382 
383         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
384         if(    (dx&3) || (dy&3)
385             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
386             || (b_w&(b_w-1))
387             || b_w == 1
388             || b_h == 1
389             || !s->plane[plane_index].fast_mc )
390             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
391         else if(b_w==32){
392             int y;
393             for(y=0; y<b_h; y+=16){
394                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
395                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
396             }
397         }else if(b_w==b_h)
398             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
399         else if(b_w==2*b_h){
400             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
401             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
402         }else{
403             av_assert2(2*b_w==b_h);
404             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
405             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
406         }
407     }
408 }
409 
410 #define mca(dx,dy,b_w)\
411 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
412     av_assert2(h==b_w);\
413     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
414 }
415 
416 mca( 0, 0,16)
417 mca( 8, 0,16)
418 mca( 0, 8,16)
419 mca( 8, 8,16)
420 mca( 0, 0,8)
421 mca( 8, 0,8)
422 mca( 0, 8,8)
423 mca( 8, 8,8)
424 
snow_static_init(void)425 static av_cold void snow_static_init(void)
426 {
427     for (int i = 0; i < MAX_REF_FRAMES; i++)
428         for (int j = 0; j < MAX_REF_FRAMES; j++)
429             ff_scale_mv_ref[i][j] = 256 * (i + 1) / (j + 1);
430 }
431 
ff_snow_common_init(AVCodecContext * avctx)432 av_cold int ff_snow_common_init(AVCodecContext *avctx){
433     static AVOnce init_static_once = AV_ONCE_INIT;
434     SnowContext *s = avctx->priv_data;
435     int width, height;
436     int i;
437 
438     s->avctx= avctx;
439     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
440     s->spatial_decomposition_count = 1;
441 
442     ff_me_cmp_init(&s->mecc, avctx);
443     ff_hpeldsp_init(&s->hdsp, avctx->flags);
444     ff_videodsp_init(&s->vdsp, 8);
445     ff_dwt_init(&s->dwt);
446     ff_h264qpel_init(&s->h264qpel, 8);
447 
448 #define mcf(dx,dy)\
449     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
450     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
451         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
452     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
453     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
454         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
455 
456     mcf( 0, 0)
457     mcf( 4, 0)
458     mcf( 8, 0)
459     mcf(12, 0)
460     mcf( 0, 4)
461     mcf( 4, 4)
462     mcf( 8, 4)
463     mcf(12, 4)
464     mcf( 0, 8)
465     mcf( 4, 8)
466     mcf( 8, 8)
467     mcf(12, 8)
468     mcf( 0,12)
469     mcf( 4,12)
470     mcf( 8,12)
471     mcf(12,12)
472 
473 #define mcfh(dx,dy)\
474     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
475     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
476         mc_block_hpel ## dx ## dy ## 16;\
477     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
478     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
479         mc_block_hpel ## dx ## dy ## 8;
480 
481     mcfh(0, 0)
482     mcfh(8, 0)
483     mcfh(0, 8)
484     mcfh(8, 8)
485 
486 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
487 
488     width= s->avctx->width;
489     height= s->avctx->height;
490 
491     if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
492         !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer,  width * height) ||  //FIXME this does not belong here
493         !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer,     width)          ||
494         !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer,    width)          ||
495         !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
496         return AVERROR(ENOMEM);
497 
498     for(i=0; i<MAX_REF_FRAMES; i++) {
499         s->last_picture[i] = av_frame_alloc();
500         if (!s->last_picture[i])
501             return AVERROR(ENOMEM);
502     }
503 
504     s->mconly_picture = av_frame_alloc();
505     s->current_picture = av_frame_alloc();
506     if (!s->mconly_picture || !s->current_picture)
507         return AVERROR(ENOMEM);
508 
509     ff_thread_once(&init_static_once, snow_static_init);
510 
511     return 0;
512 }
513 
ff_snow_common_init_after_header(AVCodecContext * avctx)514 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
515     SnowContext *s = avctx->priv_data;
516     int plane_index, level, orientation;
517     int ret, emu_buf_size;
518 
519     if(!s->scratchbuf) {
520         if (av_codec_is_decoder(avctx->codec)) {
521             if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
522                                      AV_GET_BUFFER_FLAG_REF)) < 0)
523                 return ret;
524         }
525 
526         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
527         if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf,      FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
528             !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
529             return AVERROR(ENOMEM);
530     }
531 
532     if (av_codec_is_decoder(avctx->codec) &&
533         s->mconly_picture->format != avctx->pix_fmt) {
534         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
535         return AVERROR_INVALIDDATA;
536     }
537 
538     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
539         int w= s->avctx->width;
540         int h= s->avctx->height;
541 
542         if(plane_index){
543             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
544             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
545         }
546         s->plane[plane_index].width = w;
547         s->plane[plane_index].height= h;
548 
549         for(level=s->spatial_decomposition_count-1; level>=0; level--){
550             for(orientation=level ? 1 : 0; orientation<4; orientation++){
551                 SubBand *b= &s->plane[plane_index].band[level][orientation];
552 
553                 b->buf= s->spatial_dwt_buffer;
554                 b->level= level;
555                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
556                 b->width = (w + !(orientation&1))>>1;
557                 b->height= (h + !(orientation>1))>>1;
558 
559                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
560                 b->buf_x_offset = 0;
561                 b->buf_y_offset = 0;
562 
563                 if(orientation&1){
564                     b->buf += (w+1)>>1;
565                     b->buf_x_offset = (w+1)>>1;
566                 }
567                 if(orientation>1){
568                     b->buf += b->stride>>1;
569                     b->buf_y_offset = b->stride_line >> 1;
570                 }
571                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
572 
573                 if(level)
574                     b->parent= &s->plane[plane_index].band[level-1][orientation];
575                 //FIXME avoid this realloc
576                 av_freep(&b->x_coeff);
577                 b->x_coeff = av_calloc((b->width + 1) * b->height + 1,
578                                        sizeof(*b->x_coeff));
579                 if (!b->x_coeff)
580                     return AVERROR(ENOMEM);
581             }
582             w= (w+1)>>1;
583             h= (h+1)>>1;
584         }
585     }
586 
587     return 0;
588 }
589 
590 #define USE_HALFPEL_PLANE 0
591 
halfpel_interpol(SnowContext * s,uint8_t * halfpel[4][4],AVFrame * frame)592 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
593     int p,x,y;
594 
595     for(p=0; p < s->nb_planes; p++){
596         int is_chroma= !!p;
597         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
598         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
599         int ls= frame->linesize[p];
600         uint8_t *src= frame->data[p];
601 
602         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
603         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
604         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
605         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
606             av_freep(&halfpel[1][p]);
607             av_freep(&halfpel[2][p]);
608             av_freep(&halfpel[3][p]);
609             return AVERROR(ENOMEM);
610         }
611         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
612         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
613         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
614 
615         halfpel[0][p]= src;
616         for(y=0; y<h; y++){
617             for(x=0; x<w; x++){
618                 int i= y*ls + x;
619 
620                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
621             }
622         }
623         for(y=0; y<h; y++){
624             for(x=0; x<w; x++){
625                 int i= y*ls + x;
626 
627                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
628             }
629         }
630         src= halfpel[1][p];
631         for(y=0; y<h; y++){
632             for(x=0; x<w; x++){
633                 int i= y*ls + x;
634 
635                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
636             }
637         }
638 
639 //FIXME border!
640     }
641     return 0;
642 }
643 
ff_snow_release_buffer(AVCodecContext * avctx)644 void ff_snow_release_buffer(AVCodecContext *avctx)
645 {
646     SnowContext *s = avctx->priv_data;
647     int i;
648 
649     if(s->last_picture[s->max_ref_frames-1]->data[0]){
650         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
651         for(i=0; i<9; i++)
652             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
653                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
654                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
655             }
656     }
657 }
658 
ff_snow_frame_start(SnowContext * s)659 int ff_snow_frame_start(SnowContext *s){
660    AVFrame *tmp;
661    int i, ret;
662 
663     ff_snow_release_buffer(s->avctx);
664 
665     tmp= s->last_picture[s->max_ref_frames-1];
666     for(i=s->max_ref_frames-1; i>0; i--)
667         s->last_picture[i] = s->last_picture[i-1];
668     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
669     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
670         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
671             return ret;
672     }
673     s->last_picture[0] = s->current_picture;
674     s->current_picture = tmp;
675 
676     if(s->keyframe){
677         s->ref_frames= 0;
678     }else{
679         int i;
680         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
681             if(i && s->last_picture[i-1]->key_frame)
682                 break;
683         s->ref_frames= i;
684         if(s->ref_frames==0){
685             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
686             return AVERROR_INVALIDDATA;
687         }
688     }
689     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
690         return ret;
691 
692     s->current_picture->key_frame= s->keyframe;
693 
694     return 0;
695 }
696 
ff_snow_common_end(SnowContext * s)697 av_cold void ff_snow_common_end(SnowContext *s)
698 {
699     int plane_index, level, orientation, i;
700 
701     av_freep(&s->spatial_dwt_buffer);
702     av_freep(&s->temp_dwt_buffer);
703     av_freep(&s->spatial_idwt_buffer);
704     av_freep(&s->temp_idwt_buffer);
705     av_freep(&s->run_buffer);
706 
707     s->m.me.temp= NULL;
708     av_freep(&s->m.me.scratchpad);
709     av_freep(&s->m.me.map);
710     av_freep(&s->m.me.score_map);
711     av_freep(&s->m.sc.obmc_scratchpad);
712 
713     av_freep(&s->block);
714     av_freep(&s->scratchbuf);
715     av_freep(&s->emu_edge_buffer);
716 
717     for(i=0; i<MAX_REF_FRAMES; i++){
718         av_freep(&s->ref_mvs[i]);
719         av_freep(&s->ref_scores[i]);
720         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
721             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
722         }
723         av_frame_free(&s->last_picture[i]);
724     }
725 
726     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
727         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
728             for(orientation=level ? 1 : 0; orientation<4; orientation++){
729                 SubBand *b= &s->plane[plane_index].band[level][orientation];
730 
731                 av_freep(&b->x_coeff);
732             }
733         }
734     }
735     av_frame_free(&s->mconly_picture);
736     av_frame_free(&s->current_picture);
737 }
738