• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; Copyright (c) 2016, Alliance for Open Media. All rights reserved
3;
4; This source code is subject to the terms of the BSD 2 Clause License and
5; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6; was not distributed with this source code in the LICENSE file, you can
7; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8; Media Patent License 1.0 was not distributed with this source code in the
9; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10;
11
12;
13
14%include "third_party/x86inc/x86inc.asm"
15
16SECTION_RODATA
17pw_4:  times 8 dw 4
18pw_8:  times 8 dw 8
19pw_16: times 4 dd 16
20pw_32: times 4 dd 32
21
22SECTION .text
23INIT_XMM sse2
24cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
25  GET_GOT     goffsetq
26
27  movq                  m0, [aboveq]
28  movq                  m2, [leftq]
29  paddw                 m0, m2
30  pshuflw               m1, m0, 0xe
31  paddw                 m0, m1
32  pshuflw               m1, m0, 0x1
33  paddw                 m0, m1
34  paddw                 m0, [GLOBAL(pw_4)]
35  psraw                 m0, 3
36  pshuflw               m0, m0, 0x0
37  movq    [dstq          ], m0
38  movq    [dstq+strideq*2], m0
39  lea                 dstq, [dstq+strideq*4]
40  movq    [dstq          ], m0
41  movq    [dstq+strideq*2], m0
42
43  RESTORE_GOT
44  RET
45
46INIT_XMM sse2
47cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset
48  GET_GOT     goffsetq
49
50  pxor                  m1, m1
51  mova                  m0, [aboveq]
52  mova                  m2, [leftq]
53  DEFINE_ARGS dst, stride, stride3, one
54  mov                 oned, 0x00010001
55  lea             stride3q, [strideq*3]
56  movd                  m3, oned
57  pshufd                m3, m3, 0x0
58  paddw                 m0, m2
59  pmaddwd               m0, m3
60  packssdw              m0, m1
61  pmaddwd               m0, m3
62  packssdw              m0, m1
63  pmaddwd               m0, m3
64  paddw                 m0, [GLOBAL(pw_8)]
65  psrlw                 m0, 4
66  pshuflw               m0, m0, 0x0
67  punpcklqdq            m0, m0
68  mova   [dstq           ], m0
69  mova   [dstq+strideq*2 ], m0
70  mova   [dstq+strideq*4 ], m0
71  mova   [dstq+stride3q*2], m0
72  lea                 dstq, [dstq+strideq*8]
73  mova   [dstq           ], m0
74  mova   [dstq+strideq*2 ], m0
75  mova   [dstq+strideq*4 ], m0
76  mova   [dstq+stride3q*2], m0
77
78  RESTORE_GOT
79  RET
80
81INIT_XMM sse2
82cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
83  GET_GOT     goffsetq
84
85  pxor                  m1, m1
86  mova                  m0, [aboveq]
87  mova                  m3, [aboveq+16]
88  mova                  m2, [leftq]
89  mova                  m4, [leftq+16]
90  DEFINE_ARGS dst, stride, stride3, lines4
91  lea             stride3q, [strideq*3]
92  mov              lines4d, 4
93  paddw                 m0, m2
94  paddw                 m0, m3
95  paddw                 m0, m4
96  movhlps               m2, m0
97  paddw                 m0, m2
98  punpcklwd             m0, m1
99  movhlps               m2, m0
100  paddd                 m0, m2
101  punpckldq             m0, m1
102  movhlps               m2, m0
103  paddd                 m0, m2
104  paddd                 m0, [GLOBAL(pw_16)]
105  psrad                 m0, 5
106  pshuflw               m0, m0, 0x0
107  punpcklqdq            m0, m0
108.loop:
109  mova   [dstq              ], m0
110  mova   [dstq           +16], m0
111  mova   [dstq+strideq*2    ], m0
112  mova   [dstq+strideq*2 +16], m0
113  mova   [dstq+strideq*4    ], m0
114  mova   [dstq+strideq*4 +16], m0
115  mova   [dstq+stride3q*2   ], m0
116  mova   [dstq+stride3q*2+16], m0
117  lea                 dstq, [dstq+strideq*8]
118  dec              lines4d
119  jnz .loop
120
121  RESTORE_GOT
122  REP_RET
123
124INIT_XMM sse2
125cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
126  GET_GOT     goffsetq
127
128  mova                  m0, [aboveq]
129  mova                  m2, [aboveq+16]
130  mova                  m3, [aboveq+32]
131  mova                  m4, [aboveq+48]
132  paddw                 m0, m2
133  paddw                 m3, m4
134  mova                  m2, [leftq]
135  mova                  m4, [leftq+16]
136  mova                  m5, [leftq+32]
137  mova                  m6, [leftq+48]
138  paddw                 m2, m4
139  paddw                 m5, m6
140  paddw                 m0, m3
141  paddw                 m2, m5
142  pxor                  m1, m1
143  paddw                 m0, m2
144  DEFINE_ARGS dst, stride, stride3, lines4
145  lea             stride3q, [strideq*3]
146  mov              lines4d, 8
147  movhlps               m2, m0
148  paddw                 m0, m2
149  punpcklwd             m0, m1
150  movhlps               m2, m0
151  paddd                 m0, m2
152  punpckldq             m0, m1
153  movhlps               m2, m0
154  paddd                 m0, m2
155  paddd                 m0, [GLOBAL(pw_32)]
156  psrad                 m0, 6
157  pshuflw               m0, m0, 0x0
158  punpcklqdq            m0, m0
159.loop:
160  mova [dstq               ], m0
161  mova [dstq          +16  ], m0
162  mova [dstq          +32  ], m0
163  mova [dstq          +48  ], m0
164  mova [dstq+strideq*2     ], m0
165  mova [dstq+strideq*2+16  ], m0
166  mova [dstq+strideq*2+32  ], m0
167  mova [dstq+strideq*2+48  ], m0
168  mova [dstq+strideq*4     ], m0
169  mova [dstq+strideq*4+16  ], m0
170  mova [dstq+strideq*4+32  ], m0
171  mova [dstq+strideq*4+48  ], m0
172  mova [dstq+stride3q*2    ], m0
173  mova [dstq+stride3q*2 +16], m0
174  mova [dstq+stride3q*2 +32], m0
175  mova [dstq+stride3q*2 +48], m0
176  lea                 dstq, [dstq+strideq*8]
177  dec              lines4d
178  jnz .loop
179
180  RESTORE_GOT
181  REP_RET
182
183INIT_XMM sse2
184cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
185  movq                  m0, [aboveq]
186  movq    [dstq          ], m0
187  movq    [dstq+strideq*2], m0
188  lea                 dstq, [dstq+strideq*4]
189  movq    [dstq          ], m0
190  movq    [dstq+strideq*2], m0
191  RET
192
193INIT_XMM sse2
194cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above
195  mova                  m0, [aboveq]
196  DEFINE_ARGS dst, stride, stride3
197  lea             stride3q, [strideq*3]
198  mova   [dstq           ], m0
199  mova   [dstq+strideq*2 ], m0
200  mova   [dstq+strideq*4 ], m0
201  mova   [dstq+stride3q*2], m0
202  lea                 dstq, [dstq+strideq*8]
203  mova   [dstq           ], m0
204  mova   [dstq+strideq*2 ], m0
205  mova   [dstq+strideq*4 ], m0
206  mova   [dstq+stride3q*2], m0
207  RET
208
209INIT_XMM sse2
210cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above
211  mova                  m0, [aboveq]
212  mova                  m1, [aboveq+16]
213  DEFINE_ARGS dst, stride, stride3, nlines4
214  lea             stride3q, [strideq*3]
215  mov              nlines4d, 4
216.loop:
217  mova    [dstq              ], m0
218  mova    [dstq           +16], m1
219  mova    [dstq+strideq*2    ], m0
220  mova    [dstq+strideq*2 +16], m1
221  mova    [dstq+strideq*4    ], m0
222  mova    [dstq+strideq*4 +16], m1
223  mova    [dstq+stride3q*2   ], m0
224  mova    [dstq+stride3q*2+16], m1
225  lea                 dstq, [dstq+strideq*8]
226  dec             nlines4d
227  jnz .loop
228  REP_RET
229
230INIT_XMM sse2
231cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
232  mova                  m0, [aboveq]
233  mova                  m1, [aboveq+16]
234  mova                  m2, [aboveq+32]
235  mova                  m3, [aboveq+48]
236  DEFINE_ARGS dst, stride, stride3, nlines4
237  lea             stride3q, [strideq*3]
238  mov              nlines4d, 8
239.loop:
240  mova [dstq               ], m0
241  mova [dstq            +16], m1
242  mova [dstq            +32], m2
243  mova [dstq            +48], m3
244  mova [dstq+strideq*2     ], m0
245  mova [dstq+strideq*2  +16], m1
246  mova [dstq+strideq*2  +32], m2
247  mova [dstq+strideq*2  +48], m3
248  mova [dstq+strideq*4     ], m0
249  mova [dstq+strideq*4  +16], m1
250  mova [dstq+strideq*4  +32], m2
251  mova [dstq+strideq*4  +48], m3
252  mova [dstq+stride3q*2    ], m0
253  mova [dstq+stride3q*2 +16], m1
254  mova [dstq+stride3q*2 +32], m2
255  mova [dstq+stride3q*2 +48], m3
256  lea                 dstq, [dstq+strideq*8]
257  dec             nlines4d
258  jnz .loop
259  REP_RET
260