1; 2; Copyright (c) 2016, Alliance for Open Media. All rights reserved 3; 4; This source code is subject to the terms of the BSD 2 Clause License and 5; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6; was not distributed with this source code in the LICENSE file, you can 7; obtain it at www.aomedia.org/license/software. If the Alliance for Open 8; Media Patent License 1.0 was not distributed with this source code in the 9; PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10; 11 12%define private_prefix av1 13 14%include "third_party/x86inc/x86inc.asm" 15 16SECTION .text 17 18%macro TRANSFORM_COLS 0 19 paddw m0, m1 20 movq m4, m0 21 psubw m3, m2 22 psubw m4, m3 23 psraw m4, 1 24 movq m5, m4 25 psubw m5, m1 ;b1 26 psubw m4, m2 ;c1 27 psubw m0, m4 28 paddw m3, m5 29 ; m0 a0 30 SWAP 1, 4 ; m1 c1 31 SWAP 2, 3 ; m2 d1 32 SWAP 3, 5 ; m3 b1 33%endmacro 34 35%macro TRANSPOSE_4X4 0 36 ; 00 01 02 03 37 ; 10 11 12 13 38 ; 20 21 22 23 39 ; 30 31 32 33 40 punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13 41 punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33 42 mova m1, m0 43 punpckldq m0, m2 ; 00 10 20 30 01 11 21 31 44 punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33 45%endmacro 46 47INIT_XMM sse2 48cglobal fwht4x4, 3, 4, 8, input, output, stride 49 lea r3q, [inputq + strideq*4] 50 movq m0, [inputq] ;a1 51 movq m1, [inputq + strideq*2] ;b1 52 movq m2, [r3q] ;c1 53 movq m3, [r3q + strideq*2] ;d1 54 55 TRANSFORM_COLS 56 TRANSPOSE_4X4 57 SWAP 1, 2 58 psrldq m1, m0, 8 59 psrldq m3, m2, 8 60 TRANSFORM_COLS 61 TRANSPOSE_4X4 62 63 psllw m0, 2 64 psllw m1, 2 65 66 ; sign extension 67 mova m2, m0 68 mova m3, m1 69 punpcklwd m0, m0 70 punpcklwd m1, m1 71 punpckhwd m2, m2 72 punpckhwd m3, m3 73 psrad m0, 16 74 psrad m1, 16 75 psrad m2, 16 76 psrad m3, 16 77 mova [outputq], m0 78 mova [outputq + 16], m2 79 mova [outputq + 32], m1 80 mova [outputq + 48], m3 81 82 RET 83