• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; Copyright (c) 2016, Alliance for Open Media. All rights reserved
3;
4; This source code is subject to the terms of the BSD 2 Clause License and
5; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6; was not distributed with this source code in the LICENSE file, you can
7; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8; Media Patent License 1.0 was not distributed with this source code in the
9; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10;
11
12;
13
14%include "third_party/x86inc/x86inc.asm"
15
16SECTION .text
17
18%macro REORDER_INPUTS 0
19  ; a c d b  to  a b c d
20  SWAP 1, 3, 2
21%endmacro
22
23%macro TRANSFORM_COLS 0
24  ; input:
25  ; m0 a
26  ; m1 b
27  ; m2 c
28  ; m3 d
29  paddw           m0,        m2
30  psubw           m3,        m1
31
32  ; wide subtract
33  punpcklwd       m4,        m0
34  punpcklwd       m5,        m3
35  psrad           m4,        16
36  psrad           m5,        16
37  psubd           m4,        m5
38  psrad           m4,        1
39  packssdw        m4,        m4             ; e
40
41  psubw           m5,        m4,        m1  ; b
42  psubw           m4,        m2             ; c
43  psubw           m0,        m5
44  paddw           m3,        m4
45                                ; m0 a
46  SWAP            1,         5  ; m1 b
47  SWAP            2,         4  ; m2 c
48                                ; m3 d
49%endmacro
50
51%macro TRANSPOSE_4X4 0
52  punpcklwd       m0,        m2
53  punpcklwd       m1,        m3
54  mova            m2,        m0
55  punpcklwd       m0,        m1
56  punpckhwd       m2,        m1
57  pshufd          m1,        m0, 0x0e
58  pshufd          m3,        m2, 0x0e
59%endmacro
60
61; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
62%macro TRANSPOSE_4X4_WIDE 0
63  mova            m3, m0
64  punpcklwd       m0, m1
65  punpckhwd       m3, m1
66  mova            m2, m0
67  punpcklwd       m0, m3
68  punpckhwd       m2, m3
69  pshufd          m1, m0, 0x0e
70  pshufd          m3, m2, 0x0e
71%endmacro
72
73%macro ADD_STORE_4P_2X 5  ; src1, src2, tmp1, tmp2, zero
74  movd            m%3,       [outputq]
75  movd            m%4,       [outputq + strideq]
76  punpcklbw       m%3,       m%5
77  punpcklbw       m%4,       m%5
78  paddw           m%1,       m%3
79  paddw           m%2,       m%4
80  packuswb        m%1,       m%5
81  packuswb        m%2,       m%5
82  movd            [outputq], m%1
83  movd            [outputq + strideq], m%2
84%endmacro
85
86INIT_XMM sse2
87cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
88  mova            m0,        [inputq +  0]
89  packssdw        m0,        [inputq + 16]
90  mova            m1,        [inputq + 32]
91  packssdw        m1,        [inputq + 48]
92  psraw           m0,        2
93  psraw           m1,        2
94
95  TRANSPOSE_4X4_WIDE
96  REORDER_INPUTS
97  TRANSFORM_COLS
98  TRANSPOSE_4X4
99  REORDER_INPUTS
100  TRANSFORM_COLS
101
102  pxor            m4, m4
103  ADD_STORE_4P_2X  0, 1, 5, 6, 4
104  lea             outputq, [outputq + 2 * strideq]
105  ADD_STORE_4P_2X  2, 3, 5, 6, 4
106
107  RET
108