• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; Copyright (c) 2016, Alliance for Open Media. All rights reserved
3;
4; This source code is subject to the terms of the BSD 2 Clause License and
5; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6; was not distributed with this source code in the LICENSE file, you can
7; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8; Media Patent License 1.0 was not distributed with this source code in the
9; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10;
11
12;
13
14%include "third_party/x86inc/x86inc.asm"
15
16SECTION_RODATA
17pb_1: times 16 db 1
18pw_4:  times 8 dw 4
19pw_8:  times 8 dw 8
20pw_16: times 8 dw 16
21pw_32: times 8 dw 32
22dc_128: times 16 db 128
23pw2_4:  times 8 dw 2
24pw2_8:  times 8 dw 4
25pw2_16:  times 8 dw 8
26pw2_32:  times 8 dw 16
27
28SECTION .text
29
30INIT_XMM sse2
31cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
32  GET_GOT     goffsetq
33
34  movd                  m2, [leftq]
35  movd                  m0, [aboveq]
36  pxor                  m1, m1
37  punpckldq             m0, m2
38  psadbw                m0, m1
39  paddw                 m0, [GLOBAL(pw_4)]
40  psraw                 m0, 3
41  pshuflw               m0, m0, 0x0
42  packuswb              m0, m0
43  movd      [dstq        ], m0
44  movd      [dstq+strideq], m0
45  lea                 dstq, [dstq+strideq*2]
46  movd      [dstq        ], m0
47  movd      [dstq+strideq], m0
48
49  RESTORE_GOT
50  RET
51
52INIT_XMM sse2
53cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset
54  movifnidn          leftq, leftmp
55  GET_GOT     goffsetq
56
57  pxor                  m1, m1
58  movd                  m0, [leftq]
59  psadbw                m0, m1
60  paddw                 m0, [GLOBAL(pw2_4)]
61  psraw                 m0, 2
62  pshuflw               m0, m0, 0x0
63  packuswb              m0, m0
64  movd      [dstq        ], m0
65  movd      [dstq+strideq], m0
66  lea                 dstq, [dstq+strideq*2]
67  movd      [dstq        ], m0
68  movd      [dstq+strideq], m0
69
70  RESTORE_GOT
71  RET
72
73INIT_XMM sse2
74cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset
75  GET_GOT     goffsetq
76
77  pxor                  m1, m1
78  movd                  m0, [aboveq]
79  psadbw                m0, m1
80  paddw                 m0, [GLOBAL(pw2_4)]
81  psraw                 m0, 2
82  pshuflw               m0, m0, 0x0
83  packuswb              m0, m0
84  movd      [dstq        ], m0
85  movd      [dstq+strideq], m0
86  lea                 dstq, [dstq+strideq*2]
87  movd      [dstq        ], m0
88  movd      [dstq+strideq], m0
89
90  RESTORE_GOT
91  RET
92
93INIT_XMM sse2
94cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
95  GET_GOT     goffsetq
96
97  pxor                  m1, m1
98  movq                  m0, [aboveq]
99  movq                  m2, [leftq]
100  DEFINE_ARGS dst, stride, stride3
101  lea             stride3q, [strideq*3]
102  psadbw                m0, m1
103  psadbw                m2, m1
104  paddw                 m0, m2
105  paddw                 m0, [GLOBAL(pw_8)]
106  psraw                 m0, 4
107  punpcklbw             m0, m0
108  pshuflw               m0, m0, 0x0
109  movq    [dstq          ], m0
110  movq    [dstq+strideq  ], m0
111  movq    [dstq+strideq*2], m0
112  movq    [dstq+stride3q ], m0
113  lea                 dstq, [dstq+strideq*4]
114  movq    [dstq          ], m0
115  movq    [dstq+strideq  ], m0
116  movq    [dstq+strideq*2], m0
117  movq    [dstq+stride3q ], m0
118
119  RESTORE_GOT
120  RET
121
122INIT_XMM sse2
123cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset
124  GET_GOT     goffsetq
125
126  pxor                  m1, m1
127  movq                  m0, [aboveq]
128  DEFINE_ARGS dst, stride, stride3
129  lea             stride3q, [strideq*3]
130  psadbw                m0, m1
131  paddw                 m0, [GLOBAL(pw2_8)]
132  psraw                 m0, 3
133  punpcklbw             m0, m0
134  pshuflw               m0, m0, 0x0
135  movq    [dstq          ], m0
136  movq    [dstq+strideq  ], m0
137  movq    [dstq+strideq*2], m0
138  movq    [dstq+stride3q ], m0
139  lea                 dstq, [dstq+strideq*4]
140  movq    [dstq          ], m0
141  movq    [dstq+strideq  ], m0
142  movq    [dstq+strideq*2], m0
143  movq    [dstq+stride3q ], m0
144
145  RESTORE_GOT
146  RET
147
148INIT_XMM sse2
149cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset
150  movifnidn          leftq, leftmp
151  GET_GOT     goffsetq
152
153  pxor                  m1, m1
154  movq                  m0, [leftq]
155  DEFINE_ARGS dst, stride, stride3
156  lea             stride3q, [strideq*3]
157  psadbw                m0, m1
158  paddw                 m0, [GLOBAL(pw2_8)]
159  psraw                 m0, 3
160  punpcklbw             m0, m0
161  pshuflw               m0, m0, 0x0
162  movq    [dstq          ], m0
163  movq    [dstq+strideq  ], m0
164  movq    [dstq+strideq*2], m0
165  movq    [dstq+stride3q ], m0
166  lea                 dstq, [dstq+strideq*4]
167  movq    [dstq          ], m0
168  movq    [dstq+strideq  ], m0
169  movq    [dstq+strideq*2], m0
170  movq    [dstq+stride3q ], m0
171
172  RESTORE_GOT
173  RET
174
175INIT_XMM sse2
176cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset
177  GET_GOT     goffsetq
178
179  DEFINE_ARGS dst, stride, stride3
180  lea             stride3q, [strideq*3]
181  movd     m0,        [GLOBAL(dc_128)]
182  movd    [dstq          ], m0
183  movd    [dstq+strideq  ], m0
184  movd    [dstq+strideq*2], m0
185  movd    [dstq+stride3q ], m0
186  RESTORE_GOT
187  RET
188
189INIT_XMM sse2
190cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset
191  GET_GOT     goffsetq
192
193  DEFINE_ARGS dst, stride, stride3
194  lea             stride3q, [strideq*3]
195  movq    m0,        [GLOBAL(dc_128)]
196  movq    [dstq          ], m0
197  movq    [dstq+strideq  ], m0
198  movq    [dstq+strideq*2], m0
199  movq    [dstq+stride3q ], m0
200  lea                 dstq, [dstq+strideq*4]
201  movq    [dstq          ], m0
202  movq    [dstq+strideq  ], m0
203  movq    [dstq+strideq*2], m0
204  movq    [dstq+stride3q ], m0
205  RESTORE_GOT
206  RET
207
208INIT_XMM sse2
209cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
210  GET_GOT     goffsetq
211
212  pxor                  m1, m1
213  mova                  m0, [aboveq]
214  mova                  m2, [leftq]
215  DEFINE_ARGS dst, stride, stride3, lines4
216  lea             stride3q, [strideq*3]
217  mov              lines4d, 4
218  psadbw                m0, m1
219  psadbw                m2, m1
220  paddw                 m0, m2
221  movhlps               m2, m0
222  paddw                 m0, m2
223  paddw                 m0, [GLOBAL(pw_16)]
224  psraw                 m0, 5
225  pshuflw               m0, m0, 0x0
226  punpcklqdq            m0, m0
227  packuswb              m0, m0
228.loop:
229  mova    [dstq          ], m0
230  mova    [dstq+strideq  ], m0
231  mova    [dstq+strideq*2], m0
232  mova    [dstq+stride3q ], m0
233  lea                 dstq, [dstq+strideq*4]
234  dec              lines4d
235  jnz .loop
236
237  RESTORE_GOT
238  REP_RET
239
240
241INIT_XMM sse2
242cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
243  GET_GOT     goffsetq
244
245  pxor                  m1, m1
246  mova                  m0, [aboveq]
247  DEFINE_ARGS dst, stride, stride3, lines4
248  lea             stride3q, [strideq*3]
249  mov              lines4d, 4
250  psadbw                m0, m1
251  movhlps               m2, m0
252  paddw                 m0, m2
253  paddw                 m0, [GLOBAL(pw2_16)]
254  psraw                 m0, 4
255  pshuflw               m0, m0, 0x0
256  punpcklqdq            m0, m0
257  packuswb              m0, m0
258.loop:
259  mova    [dstq          ], m0
260  mova    [dstq+strideq  ], m0
261  mova    [dstq+strideq*2], m0
262  mova    [dstq+stride3q ], m0
263  lea                 dstq, [dstq+strideq*4]
264  dec              lines4d
265  jnz .loop
266
267  RESTORE_GOT
268  REP_RET
269
270INIT_XMM sse2
271cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
272  GET_GOT     goffsetq
273
274  pxor                  m1, m1
275  mova                  m0, [leftq]
276  DEFINE_ARGS dst, stride, stride3, lines4
277  lea             stride3q, [strideq*3]
278  mov              lines4d, 4
279  psadbw                m0, m1
280  movhlps               m2, m0
281  paddw                 m0, m2
282  paddw                 m0, [GLOBAL(pw2_16)]
283  psraw                 m0, 4
284  pshuflw               m0, m0, 0x0
285  punpcklqdq            m0, m0
286  packuswb              m0, m0
287.loop:
288  mova    [dstq          ], m0
289  mova    [dstq+strideq  ], m0
290  mova    [dstq+strideq*2], m0
291  mova    [dstq+stride3q ], m0
292  lea                 dstq, [dstq+strideq*4]
293  dec              lines4d
294  jnz .loop
295
296  RESTORE_GOT
297  REP_RET
298
299INIT_XMM sse2
300cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
301  GET_GOT     goffsetq
302
303  DEFINE_ARGS dst, stride, stride3, lines4
304  lea             stride3q, [strideq*3]
305  mov              lines4d, 4
306  mova    m0,        [GLOBAL(dc_128)]
307.loop:
308  mova    [dstq          ], m0
309  mova    [dstq+strideq  ], m0
310  mova    [dstq+strideq*2], m0
311  mova    [dstq+stride3q ], m0
312  lea                 dstq, [dstq+strideq*4]
313  dec              lines4d
314  jnz .loop
315  RESTORE_GOT
316  RET
317
318
319INIT_XMM sse2
320cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
321  GET_GOT     goffsetq
322
323  pxor                  m1, m1
324  mova                  m0, [aboveq]
325  mova                  m2, [aboveq+16]
326  mova                  m3, [leftq]
327  mova                  m4, [leftq+16]
328  DEFINE_ARGS dst, stride, stride3, lines4
329  lea             stride3q, [strideq*3]
330  mov              lines4d, 8
331  psadbw                m0, m1
332  psadbw                m2, m1
333  psadbw                m3, m1
334  psadbw                m4, m1
335  paddw                 m0, m2
336  paddw                 m0, m3
337  paddw                 m0, m4
338  movhlps               m2, m0
339  paddw                 m0, m2
340  paddw                 m0, [GLOBAL(pw_32)]
341  psraw                 m0, 6
342  pshuflw               m0, m0, 0x0
343  punpcklqdq            m0, m0
344  packuswb              m0, m0
345.loop:
346  mova [dstq             ], m0
347  mova [dstq          +16], m0
348  mova [dstq+strideq     ], m0
349  mova [dstq+strideq  +16], m0
350  mova [dstq+strideq*2   ], m0
351  mova [dstq+strideq*2+16], m0
352  mova [dstq+stride3q    ], m0
353  mova [dstq+stride3q +16], m0
354  lea                 dstq, [dstq+strideq*4]
355  dec              lines4d
356  jnz .loop
357
358  RESTORE_GOT
359  REP_RET
360
361INIT_XMM sse2
362cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
363  GET_GOT     goffsetq
364
365  pxor                  m1, m1
366  mova                  m0, [aboveq]
367  mova                  m2, [aboveq+16]
368  DEFINE_ARGS dst, stride, stride3, lines4
369  lea             stride3q, [strideq*3]
370  mov              lines4d, 8
371  psadbw                m0, m1
372  psadbw                m2, m1
373  paddw                 m0, m2
374  movhlps               m2, m0
375  paddw                 m0, m2
376  paddw                 m0, [GLOBAL(pw2_32)]
377  psraw                 m0, 5
378  pshuflw               m0, m0, 0x0
379  punpcklqdq            m0, m0
380  packuswb              m0, m0
381.loop:
382  mova [dstq             ], m0
383  mova [dstq          +16], m0
384  mova [dstq+strideq     ], m0
385  mova [dstq+strideq  +16], m0
386  mova [dstq+strideq*2   ], m0
387  mova [dstq+strideq*2+16], m0
388  mova [dstq+stride3q    ], m0
389  mova [dstq+stride3q +16], m0
390  lea                 dstq, [dstq+strideq*4]
391  dec              lines4d
392  jnz .loop
393
394  RESTORE_GOT
395  REP_RET
396
397INIT_XMM sse2
398cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
399  GET_GOT     goffsetq
400
401  pxor                  m1, m1
402  mova                  m0, [leftq]
403  mova                  m2, [leftq+16]
404  DEFINE_ARGS dst, stride, stride3, lines4
405  lea             stride3q, [strideq*3]
406  mov              lines4d, 8
407  psadbw                m0, m1
408  psadbw                m2, m1
409  paddw                 m0, m2
410  movhlps               m2, m0
411  paddw                 m0, m2
412  paddw                 m0, [GLOBAL(pw2_32)]
413  psraw                 m0, 5
414  pshuflw               m0, m0, 0x0
415  punpcklqdq            m0, m0
416  packuswb              m0, m0
417.loop:
418  mova [dstq             ], m0
419  mova [dstq          +16], m0
420  mova [dstq+strideq     ], m0
421  mova [dstq+strideq  +16], m0
422  mova [dstq+strideq*2   ], m0
423  mova [dstq+strideq*2+16], m0
424  mova [dstq+stride3q    ], m0
425  mova [dstq+stride3q +16], m0
426  lea                 dstq, [dstq+strideq*4]
427  dec              lines4d
428  jnz .loop
429
430  RESTORE_GOT
431  REP_RET
432
433INIT_XMM sse2
434cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset
435  GET_GOT     goffsetq
436
437  DEFINE_ARGS dst, stride, stride3, lines4
438  lea             stride3q, [strideq*3]
439  mov              lines4d, 8
440  mova    m0,        [GLOBAL(dc_128)]
441.loop:
442  mova [dstq             ], m0
443  mova [dstq          +16], m0
444  mova [dstq+strideq     ], m0
445  mova [dstq+strideq  +16], m0
446  mova [dstq+strideq*2   ], m0
447  mova [dstq+strideq*2+16], m0
448  mova [dstq+stride3q    ], m0
449  mova [dstq+stride3q +16], m0
450  lea                 dstq, [dstq+strideq*4]
451  dec              lines4d
452  jnz .loop
453  RESTORE_GOT
454  RET
455
456INIT_XMM sse2
457cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
458  movd                  m0, [aboveq]
459  movd      [dstq        ], m0
460  movd      [dstq+strideq], m0
461  lea                 dstq, [dstq+strideq*2]
462  movd      [dstq        ], m0
463  movd      [dstq+strideq], m0
464  RET
465
466INIT_XMM sse2
467cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
468  movq                  m0, [aboveq]
469  DEFINE_ARGS dst, stride, stride3
470  lea             stride3q, [strideq*3]
471  movq    [dstq          ], m0
472  movq    [dstq+strideq  ], m0
473  movq    [dstq+strideq*2], m0
474  movq    [dstq+stride3q ], m0
475  lea                 dstq, [dstq+strideq*4]
476  movq    [dstq          ], m0
477  movq    [dstq+strideq  ], m0
478  movq    [dstq+strideq*2], m0
479  movq    [dstq+stride3q ], m0
480  RET
481
482INIT_XMM sse2
483cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
484  mova                  m0, [aboveq]
485  DEFINE_ARGS dst, stride, stride3, nlines4
486  lea             stride3q, [strideq*3]
487  mov              nlines4d, 4
488.loop:
489  mova    [dstq          ], m0
490  mova    [dstq+strideq  ], m0
491  mova    [dstq+strideq*2], m0
492  mova    [dstq+stride3q ], m0
493  lea                 dstq, [dstq+strideq*4]
494  dec             nlines4d
495  jnz .loop
496  REP_RET
497
498INIT_XMM sse2
499cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
500  mova                  m0, [aboveq]
501  mova                  m1, [aboveq+16]
502  DEFINE_ARGS dst, stride, stride3, nlines4
503  lea             stride3q, [strideq*3]
504  mov              nlines4d, 8
505.loop:
506  mova [dstq             ], m0
507  mova [dstq          +16], m1
508  mova [dstq+strideq     ], m0
509  mova [dstq+strideq  +16], m1
510  mova [dstq+strideq*2   ], m0
511  mova [dstq+strideq*2+16], m1
512  mova [dstq+stride3q    ], m0
513  mova [dstq+stride3q +16], m1
514  lea                 dstq, [dstq+strideq*4]
515  dec             nlines4d
516  jnz .loop
517  REP_RET
518
519INIT_XMM sse2
520cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left
521  movifnidn          leftq, leftmp
522  movd                  m0, [leftq]
523  punpcklbw             m0, m0
524  punpcklbw             m0, m0
525  pshufd                m1, m0, 0x1
526  movd      [dstq        ], m0
527  movd      [dstq+strideq], m1
528  pshufd                m2, m0, 0x2
529  lea                 dstq, [dstq+strideq*2]
530  pshufd                m3, m0, 0x3
531  movd      [dstq        ], m2
532  movd      [dstq+strideq], m3
533  RET
534
535INIT_XMM sse2
536cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left
537  movifnidn          leftq, leftmp
538  mov                lineq, -2
539  DEFINE_ARGS  dst, stride, line, left, stride3
540  lea             stride3q, [strideq*3]
541  movq                  m0, [leftq    ]
542  punpcklbw             m0, m0              ; l1 l1 l2 l2 ... l8 l8
543.loop:
544  pshuflw               m1, m0, 0x0         ; l1 l1 l1 l1 l1 l1 l1 l1
545  pshuflw               m2, m0, 0x55        ; l2 l2 l2 l2 l2 l2 l2 l2
546  movq      [dstq        ], m1
547  movq      [dstq+strideq], m2
548  pshuflw               m1, m0, 0xaa
549  pshuflw               m2, m0, 0xff
550  movq    [dstq+strideq*2], m1
551  movq    [dstq+stride3q ], m2
552  pshufd                m0, m0, 0xe         ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8
553  inc                lineq
554  lea                 dstq, [dstq+strideq*4]
555  jnz .loop
556  REP_RET
557
558INIT_XMM sse2
559cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left
560  movifnidn          leftq, leftmp
561  mov                lineq, -4
562  DEFINE_ARGS dst, stride, line, left, stride3
563  lea             stride3q, [strideq*3]
564.loop:
565  movd                  m0, [leftq]
566  punpcklbw             m0, m0
567  punpcklbw             m0, m0              ; l1 to l4 each repeated 4 times
568  pshufd            m1, m0, 0x0             ; l1 repeated 16 times
569  pshufd            m2, m0, 0x55            ; l2 repeated 16 times
570  mova    [dstq          ], m1
571  mova    [dstq+strideq  ], m2
572  pshufd            m1, m0, 0xaa
573  pshufd            m2, m0, 0xff
574  mova    [dstq+strideq*2], m1
575  mova    [dstq+stride3q ], m2
576  inc                lineq
577  lea                leftq, [leftq+4       ]
578  lea                 dstq, [dstq+strideq*4]
579  jnz .loop
580  REP_RET
581
582INIT_XMM sse2
583cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left
584  movifnidn              leftq, leftmp
585  mov                    lineq, -8
586  DEFINE_ARGS dst, stride, line, left, stride3
587  lea                 stride3q, [strideq*3]
588.loop:
589  movd                      m0, [leftq]
590  punpcklbw                 m0, m0
591  punpcklbw                 m0, m0              ; l1 to l4 each repeated 4 times
592  pshufd                m1, m0, 0x0             ; l1 repeated 16 times
593  pshufd                m2, m0, 0x55            ; l2 repeated 16 times
594  mova     [dstq             ], m1
595  mova     [dstq+16          ], m1
596  mova     [dstq+strideq     ], m2
597  mova     [dstq+strideq+16  ], m2
598  pshufd                m1, m0, 0xaa
599  pshufd                m2, m0, 0xff
600  mova     [dstq+strideq*2   ], m1
601  mova     [dstq+strideq*2+16], m1
602  mova     [dstq+stride3q    ], m2
603  mova     [dstq+stride3q+16 ], m2
604  inc                    lineq
605  lea                    leftq, [leftq+4       ]
606  lea                     dstq, [dstq+strideq*4]
607  jnz .loop
608  REP_RET
609