1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/thread.h"
25 #include "avcodec.h"
26 #include "encode.h"
27 #include "me_cmp.h"
28 #include "snow_dwt.h"
29 #include "internal.h"
30 #include "snow.h"
31 #include "snowdata.h"
32
33 #include "rangecoder.h"
34 #include "mathops.h"
35
36
ff_snow_inner_add_yblock(const uint8_t * obmc,const int obmc_stride,uint8_t ** block,int b_w,int b_h,int src_x,int src_y,int src_stride,slice_buffer * sb,int add,uint8_t * dst8)37 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
38 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
39 int y, x;
40 IDWTELEM * dst;
41 for(y=0; y<b_h; y++){
42 //FIXME ugly misuse of obmc_stride
43 const uint8_t *obmc1= obmc + y*obmc_stride;
44 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
45 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
46 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
47 dst = slice_buffer_get_line(sb, src_y + y);
48 for(x=0; x<b_w; x++){
49 int v= obmc1[x] * block[3][x + y*src_stride]
50 +obmc2[x] * block[2][x + y*src_stride]
51 +obmc3[x] * block[1][x + y*src_stride]
52 +obmc4[x] * block[0][x + y*src_stride];
53
54 v <<= 8 - LOG2_OBMC_MAX;
55 if(FRAC_BITS != 8){
56 v >>= 8 - FRAC_BITS;
57 }
58 if(add){
59 v += dst[x + src_x];
60 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
61 if(v&(~255)) v= ~(v>>31);
62 dst8[x + y*src_stride] = v;
63 }else{
64 dst[x + src_x] -= v;
65 }
66 }
67 }
68 }
69
ff_snow_get_buffer(SnowContext * s,AVFrame * frame)70 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
71 {
72 int ret, i;
73 int edges_needed = av_codec_is_encoder(s->avctx->codec);
74
75 frame->width = s->avctx->width ;
76 frame->height = s->avctx->height;
77 if (edges_needed) {
78 frame->width += 2 * EDGE_WIDTH;
79 frame->height += 2 * EDGE_WIDTH;
80
81 ret = ff_encode_alloc_frame(s->avctx, frame);
82 } else
83 ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF);
84 if (ret < 0)
85 return ret;
86 if (edges_needed) {
87 for (i = 0; frame->data[i]; i++) {
88 int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
89 frame->linesize[i] +
90 (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
91 frame->data[i] += offset;
92 }
93 frame->width = s->avctx->width;
94 frame->height = s->avctx->height;
95 }
96
97 return 0;
98 }
99
ff_snow_reset_contexts(SnowContext * s)100 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
101 int plane_index, level, orientation;
102
103 for(plane_index=0; plane_index<3; plane_index++){
104 for(level=0; level<MAX_DECOMPOSITIONS; level++){
105 for(orientation=level ? 1:0; orientation<4; orientation++){
106 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
107 }
108 }
109 }
110 memset(s->header_state, MID_STATE, sizeof(s->header_state));
111 memset(s->block_state, MID_STATE, sizeof(s->block_state));
112 }
113
ff_snow_alloc_blocks(SnowContext * s)114 int ff_snow_alloc_blocks(SnowContext *s){
115 int w= AV_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
116 int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
117
118 s->b_width = w;
119 s->b_height= h;
120
121 av_free(s->block);
122 s->block = av_calloc(w * h, sizeof(*s->block) << (s->block_max_depth*2));
123 if (!s->block)
124 return AVERROR(ENOMEM);
125
126 return 0;
127 }
128
mc_block(Plane * p,uint8_t * dst,const uint8_t * src,int stride,int b_w,int b_h,int dx,int dy)129 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
130 static const uint8_t weight[64]={
131 8,7,6,5,4,3,2,1,
132 7,7,0,0,0,0,0,1,
133 6,0,6,0,0,0,2,0,
134 5,0,0,5,0,3,0,0,
135 4,0,0,0,4,0,0,0,
136 3,0,0,5,0,3,0,0,
137 2,0,6,0,0,0,2,0,
138 1,7,0,0,0,0,0,1,
139 };
140
141 static const uint8_t brane[256]={
142 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
143 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
144 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
145 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
146 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
147 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
148 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
149 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
150 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
151 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
152 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
153 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
154 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
155 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
156 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
157 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
158 };
159
160 static const uint8_t needs[16]={
161 0,1,0,0,
162 2,4,2,0,
163 0,1,0,0,
164 15
165 };
166
167 int x, y, b, r, l;
168 int16_t tmpIt [64*(32+HTAPS_MAX)];
169 uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
170 int16_t *tmpI= tmpIt;
171 uint8_t *tmp2= tmp2t[0];
172 const uint8_t *hpel[11];
173 av_assert2(dx<16 && dy<16);
174 r= brane[dx + 16*dy]&15;
175 l= brane[dx + 16*dy]>>4;
176
177 b= needs[l] | needs[r];
178 if(p && !p->diag_mc)
179 b= 15;
180
181 if(b&5){
182 for(y=0; y < b_h+HTAPS_MAX-1; y++){
183 for(x=0; x < b_w; x++){
184 int a_1=src[x + HTAPS_MAX/2-4];
185 int a0= src[x + HTAPS_MAX/2-3];
186 int a1= src[x + HTAPS_MAX/2-2];
187 int a2= src[x + HTAPS_MAX/2-1];
188 int a3= src[x + HTAPS_MAX/2+0];
189 int a4= src[x + HTAPS_MAX/2+1];
190 int a5= src[x + HTAPS_MAX/2+2];
191 int a6= src[x + HTAPS_MAX/2+3];
192 int am=0;
193 if(!p || p->fast_mc){
194 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
195 tmpI[x]= am;
196 am= (am+16)>>5;
197 }else{
198 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
199 tmpI[x]= am;
200 am= (am+32)>>6;
201 }
202
203 if(am&(~255)) am= ~(am>>31);
204 tmp2[x]= am;
205 }
206 tmpI+= 64;
207 tmp2+= 64;
208 src += stride;
209 }
210 src -= stride*y;
211 }
212 src += HTAPS_MAX/2 - 1;
213 tmp2= tmp2t[1];
214
215 if(b&2){
216 for(y=0; y < b_h; y++){
217 for(x=0; x < b_w+1; x++){
218 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
219 int a0= src[x + (HTAPS_MAX/2-3)*stride];
220 int a1= src[x + (HTAPS_MAX/2-2)*stride];
221 int a2= src[x + (HTAPS_MAX/2-1)*stride];
222 int a3= src[x + (HTAPS_MAX/2+0)*stride];
223 int a4= src[x + (HTAPS_MAX/2+1)*stride];
224 int a5= src[x + (HTAPS_MAX/2+2)*stride];
225 int a6= src[x + (HTAPS_MAX/2+3)*stride];
226 int am=0;
227 if(!p || p->fast_mc)
228 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
229 else
230 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
231
232 if(am&(~255)) am= ~(am>>31);
233 tmp2[x]= am;
234 }
235 src += stride;
236 tmp2+= 64;
237 }
238 src -= stride*y;
239 }
240 src += stride*(HTAPS_MAX/2 - 1);
241 tmp2= tmp2t[2];
242 tmpI= tmpIt;
243 if(b&4){
244 for(y=0; y < b_h; y++){
245 for(x=0; x < b_w; x++){
246 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
247 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
248 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
249 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
250 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
251 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
252 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
253 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
254 int am=0;
255 if(!p || p->fast_mc)
256 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
257 else
258 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
259 if(am&(~255)) am= ~(am>>31);
260 tmp2[x]= am;
261 }
262 tmpI+= 64;
263 tmp2+= 64;
264 }
265 }
266
267 hpel[ 0]= src;
268 hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
269 hpel[ 2]= src + 1;
270
271 hpel[ 4]= tmp2t[1];
272 hpel[ 5]= tmp2t[2];
273 hpel[ 6]= tmp2t[1] + 1;
274
275 hpel[ 8]= src + stride;
276 hpel[ 9]= hpel[1] + 64;
277 hpel[10]= hpel[8] + 1;
278
279 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
280
281 if(b==15){
282 int dxy = dx / 8 + dy / 8 * 4;
283 const uint8_t *src1 = hpel[dxy ];
284 const uint8_t *src2 = hpel[dxy + 1];
285 const uint8_t *src3 = hpel[dxy + 4];
286 const uint8_t *src4 = hpel[dxy + 5];
287 int stride1 = MC_STRIDE(dxy);
288 int stride2 = MC_STRIDE(dxy + 1);
289 int stride3 = MC_STRIDE(dxy + 4);
290 int stride4 = MC_STRIDE(dxy + 5);
291 dx&=7;
292 dy&=7;
293 for(y=0; y < b_h; y++){
294 for(x=0; x < b_w; x++){
295 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
296 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
297 }
298 src1+=stride1;
299 src2+=stride2;
300 src3+=stride3;
301 src4+=stride4;
302 dst +=stride;
303 }
304 }else{
305 const uint8_t *src1= hpel[l];
306 const uint8_t *src2= hpel[r];
307 int stride1 = MC_STRIDE(l);
308 int stride2 = MC_STRIDE(r);
309 int a= weight[((dx&7) + (8*(dy&7)))];
310 int b= 8-a;
311 for(y=0; y < b_h; y++){
312 for(x=0; x < b_w; x++){
313 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
314 }
315 src1+=stride1;
316 src2+=stride2;
317 dst +=stride;
318 }
319 }
320 }
321
ff_snow_pred_block(SnowContext * s,uint8_t * dst,uint8_t * tmp,ptrdiff_t stride,int sx,int sy,int b_w,int b_h,const BlockNode * block,int plane_index,int w,int h)322 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
323 if(block->type & BLOCK_INTRA){
324 int x, y;
325 const unsigned color = block->color[plane_index];
326 const unsigned color4 = color*0x01010101;
327 if(b_w==32){
328 for(y=0; y < b_h; y++){
329 *(uint32_t*)&dst[0 + y*stride]= color4;
330 *(uint32_t*)&dst[4 + y*stride]= color4;
331 *(uint32_t*)&dst[8 + y*stride]= color4;
332 *(uint32_t*)&dst[12+ y*stride]= color4;
333 *(uint32_t*)&dst[16+ y*stride]= color4;
334 *(uint32_t*)&dst[20+ y*stride]= color4;
335 *(uint32_t*)&dst[24+ y*stride]= color4;
336 *(uint32_t*)&dst[28+ y*stride]= color4;
337 }
338 }else if(b_w==16){
339 for(y=0; y < b_h; y++){
340 *(uint32_t*)&dst[0 + y*stride]= color4;
341 *(uint32_t*)&dst[4 + y*stride]= color4;
342 *(uint32_t*)&dst[8 + y*stride]= color4;
343 *(uint32_t*)&dst[12+ y*stride]= color4;
344 }
345 }else if(b_w==8){
346 for(y=0; y < b_h; y++){
347 *(uint32_t*)&dst[0 + y*stride]= color4;
348 *(uint32_t*)&dst[4 + y*stride]= color4;
349 }
350 }else if(b_w==4){
351 for(y=0; y < b_h; y++){
352 *(uint32_t*)&dst[0 + y*stride]= color4;
353 }
354 }else{
355 for(y=0; y < b_h; y++){
356 for(x=0; x < b_w; x++){
357 dst[x + y*stride]= color;
358 }
359 }
360 }
361 }else{
362 uint8_t *src= s->last_picture[block->ref]->data[plane_index];
363 const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
364 int mx= block->mx*scale;
365 int my= block->my*scale;
366 const int dx= mx&15;
367 const int dy= my&15;
368 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
369 sx += (mx>>4) - (HTAPS_MAX/2-1);
370 sy += (my>>4) - (HTAPS_MAX/2-1);
371 src += sx + sy*stride;
372 if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
373 || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
374 s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
375 stride, stride,
376 b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
377 sx, sy, w, h);
378 src= tmp + MB_SIZE;
379 }
380
381 av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
382
383 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
384 if( (dx&3) || (dy&3)
385 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
386 || (b_w&(b_w-1))
387 || b_w == 1
388 || b_h == 1
389 || !s->plane[plane_index].fast_mc )
390 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
391 else if(b_w==32){
392 int y;
393 for(y=0; y<b_h; y+=16){
394 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
395 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
396 }
397 }else if(b_w==b_h)
398 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
399 else if(b_w==2*b_h){
400 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
401 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
402 }else{
403 av_assert2(2*b_w==b_h);
404 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
405 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
406 }
407 }
408 }
409
410 #define mca(dx,dy,b_w)\
411 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
412 av_assert2(h==b_w);\
413 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
414 }
415
416 mca( 0, 0,16)
417 mca( 8, 0,16)
418 mca( 0, 8,16)
419 mca( 8, 8,16)
420 mca( 0, 0,8)
421 mca( 8, 0,8)
422 mca( 0, 8,8)
423 mca( 8, 8,8)
424
snow_static_init(void)425 static av_cold void snow_static_init(void)
426 {
427 for (int i = 0; i < MAX_REF_FRAMES; i++)
428 for (int j = 0; j < MAX_REF_FRAMES; j++)
429 ff_scale_mv_ref[i][j] = 256 * (i + 1) / (j + 1);
430 }
431
ff_snow_common_init(AVCodecContext * avctx)432 av_cold int ff_snow_common_init(AVCodecContext *avctx){
433 static AVOnce init_static_once = AV_ONCE_INIT;
434 SnowContext *s = avctx->priv_data;
435 int width, height;
436 int i;
437
438 s->avctx= avctx;
439 s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
440 s->spatial_decomposition_count = 1;
441
442 ff_me_cmp_init(&s->mecc, avctx);
443 ff_hpeldsp_init(&s->hdsp, avctx->flags);
444 ff_videodsp_init(&s->vdsp, 8);
445 ff_dwt_init(&s->dwt);
446 ff_h264qpel_init(&s->h264qpel, 8);
447
448 #define mcf(dx,dy)\
449 s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\
450 s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
451 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
452 s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\
453 s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
454 s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
455
456 mcf( 0, 0)
457 mcf( 4, 0)
458 mcf( 8, 0)
459 mcf(12, 0)
460 mcf( 0, 4)
461 mcf( 4, 4)
462 mcf( 8, 4)
463 mcf(12, 4)
464 mcf( 0, 8)
465 mcf( 4, 8)
466 mcf( 8, 8)
467 mcf(12, 8)
468 mcf( 0,12)
469 mcf( 4,12)
470 mcf( 8,12)
471 mcf(12,12)
472
473 #define mcfh(dx,dy)\
474 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
475 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
476 mc_block_hpel ## dx ## dy ## 16;\
477 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
478 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
479 mc_block_hpel ## dx ## dy ## 8;
480
481 mcfh(0, 0)
482 mcfh(8, 0)
483 mcfh(0, 8)
484 mcfh(8, 8)
485
486 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
487
488 width= s->avctx->width;
489 height= s->avctx->height;
490
491 if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
492 !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer, width * height) || //FIXME this does not belong here
493 !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer, width) ||
494 !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer, width) ||
495 !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
496 return AVERROR(ENOMEM);
497
498 for(i=0; i<MAX_REF_FRAMES; i++) {
499 s->last_picture[i] = av_frame_alloc();
500 if (!s->last_picture[i])
501 return AVERROR(ENOMEM);
502 }
503
504 s->mconly_picture = av_frame_alloc();
505 s->current_picture = av_frame_alloc();
506 if (!s->mconly_picture || !s->current_picture)
507 return AVERROR(ENOMEM);
508
509 ff_thread_once(&init_static_once, snow_static_init);
510
511 return 0;
512 }
513
ff_snow_common_init_after_header(AVCodecContext * avctx)514 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
515 SnowContext *s = avctx->priv_data;
516 int plane_index, level, orientation;
517 int ret, emu_buf_size;
518
519 if(!s->scratchbuf) {
520 if (av_codec_is_decoder(avctx->codec)) {
521 if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
522 AV_GET_BUFFER_FLAG_REF)) < 0)
523 return ret;
524 }
525
526 emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
527 if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
528 !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
529 return AVERROR(ENOMEM);
530 }
531
532 if (av_codec_is_decoder(avctx->codec) &&
533 s->mconly_picture->format != avctx->pix_fmt) {
534 av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
535 return AVERROR_INVALIDDATA;
536 }
537
538 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
539 int w= s->avctx->width;
540 int h= s->avctx->height;
541
542 if(plane_index){
543 w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
544 h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
545 }
546 s->plane[plane_index].width = w;
547 s->plane[plane_index].height= h;
548
549 for(level=s->spatial_decomposition_count-1; level>=0; level--){
550 for(orientation=level ? 1 : 0; orientation<4; orientation++){
551 SubBand *b= &s->plane[plane_index].band[level][orientation];
552
553 b->buf= s->spatial_dwt_buffer;
554 b->level= level;
555 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
556 b->width = (w + !(orientation&1))>>1;
557 b->height= (h + !(orientation>1))>>1;
558
559 b->stride_line = 1 << (s->spatial_decomposition_count - level);
560 b->buf_x_offset = 0;
561 b->buf_y_offset = 0;
562
563 if(orientation&1){
564 b->buf += (w+1)>>1;
565 b->buf_x_offset = (w+1)>>1;
566 }
567 if(orientation>1){
568 b->buf += b->stride>>1;
569 b->buf_y_offset = b->stride_line >> 1;
570 }
571 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
572
573 if(level)
574 b->parent= &s->plane[plane_index].band[level-1][orientation];
575 //FIXME avoid this realloc
576 av_freep(&b->x_coeff);
577 b->x_coeff = av_calloc((b->width + 1) * b->height + 1,
578 sizeof(*b->x_coeff));
579 if (!b->x_coeff)
580 return AVERROR(ENOMEM);
581 }
582 w= (w+1)>>1;
583 h= (h+1)>>1;
584 }
585 }
586
587 return 0;
588 }
589
590 #define USE_HALFPEL_PLANE 0
591
halfpel_interpol(SnowContext * s,uint8_t * halfpel[4][4],AVFrame * frame)592 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
593 int p,x,y;
594
595 for(p=0; p < s->nb_planes; p++){
596 int is_chroma= !!p;
597 int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width, s->chroma_h_shift) : s->avctx->width;
598 int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
599 int ls= frame->linesize[p];
600 uint8_t *src= frame->data[p];
601
602 halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
603 halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
604 halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
605 if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
606 av_freep(&halfpel[1][p]);
607 av_freep(&halfpel[2][p]);
608 av_freep(&halfpel[3][p]);
609 return AVERROR(ENOMEM);
610 }
611 halfpel[1][p] += EDGE_WIDTH * (1 + ls);
612 halfpel[2][p] += EDGE_WIDTH * (1 + ls);
613 halfpel[3][p] += EDGE_WIDTH * (1 + ls);
614
615 halfpel[0][p]= src;
616 for(y=0; y<h; y++){
617 for(x=0; x<w; x++){
618 int i= y*ls + x;
619
620 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
621 }
622 }
623 for(y=0; y<h; y++){
624 for(x=0; x<w; x++){
625 int i= y*ls + x;
626
627 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
628 }
629 }
630 src= halfpel[1][p];
631 for(y=0; y<h; y++){
632 for(x=0; x<w; x++){
633 int i= y*ls + x;
634
635 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
636 }
637 }
638
639 //FIXME border!
640 }
641 return 0;
642 }
643
ff_snow_release_buffer(AVCodecContext * avctx)644 void ff_snow_release_buffer(AVCodecContext *avctx)
645 {
646 SnowContext *s = avctx->priv_data;
647 int i;
648
649 if(s->last_picture[s->max_ref_frames-1]->data[0]){
650 av_frame_unref(s->last_picture[s->max_ref_frames-1]);
651 for(i=0; i<9; i++)
652 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
653 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
654 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
655 }
656 }
657 }
658
ff_snow_frame_start(SnowContext * s)659 int ff_snow_frame_start(SnowContext *s){
660 AVFrame *tmp;
661 int i, ret;
662
663 ff_snow_release_buffer(s->avctx);
664
665 tmp= s->last_picture[s->max_ref_frames-1];
666 for(i=s->max_ref_frames-1; i>0; i--)
667 s->last_picture[i] = s->last_picture[i-1];
668 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
669 if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
670 if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
671 return ret;
672 }
673 s->last_picture[0] = s->current_picture;
674 s->current_picture = tmp;
675
676 if(s->keyframe){
677 s->ref_frames= 0;
678 }else{
679 int i;
680 for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
681 if(i && s->last_picture[i-1]->key_frame)
682 break;
683 s->ref_frames= i;
684 if(s->ref_frames==0){
685 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
686 return AVERROR_INVALIDDATA;
687 }
688 }
689 if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
690 return ret;
691
692 s->current_picture->key_frame= s->keyframe;
693
694 return 0;
695 }
696
ff_snow_common_end(SnowContext * s)697 av_cold void ff_snow_common_end(SnowContext *s)
698 {
699 int plane_index, level, orientation, i;
700
701 av_freep(&s->spatial_dwt_buffer);
702 av_freep(&s->temp_dwt_buffer);
703 av_freep(&s->spatial_idwt_buffer);
704 av_freep(&s->temp_idwt_buffer);
705 av_freep(&s->run_buffer);
706
707 s->m.me.temp= NULL;
708 av_freep(&s->m.me.scratchpad);
709 av_freep(&s->m.me.map);
710 av_freep(&s->m.me.score_map);
711 av_freep(&s->m.sc.obmc_scratchpad);
712
713 av_freep(&s->block);
714 av_freep(&s->scratchbuf);
715 av_freep(&s->emu_edge_buffer);
716
717 for(i=0; i<MAX_REF_FRAMES; i++){
718 av_freep(&s->ref_mvs[i]);
719 av_freep(&s->ref_scores[i]);
720 if(s->last_picture[i] && s->last_picture[i]->data[0]) {
721 av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
722 }
723 av_frame_free(&s->last_picture[i]);
724 }
725
726 for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
727 for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
728 for(orientation=level ? 1 : 0; orientation<4; orientation++){
729 SubBand *b= &s->plane[plane_index].band[level][orientation];
730
731 av_freep(&b->x_coeff);
732 }
733 }
734 }
735 av_frame_free(&s->mconly_picture);
736 av_frame_free(&s->current_picture);
737 }
738