1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "vp9/common/vp9_enums.h"
14 #include "vpx_dsp/mips/inv_txfm_msa.h"
15
vp9_iht8x8_64_add_msa(const int16_t * input,uint8_t * dst,int32_t dst_stride,int32_t tx_type)16 void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
17 int32_t dst_stride, int32_t tx_type) {
18 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
19
20 /* load vector elements of 8x8 block */
21 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
22
23 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
24 in4, in5, in6, in7);
25
26 switch (tx_type) {
27 case DCT_DCT:
28 /* DCT in horizontal */
29 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
30 in4, in5, in6, in7);
31 /* DCT in vertical */
32 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
33 in3, in4, in5, in6, in7);
34 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
35 in4, in5, in6, in7);
36 break;
37 case ADST_DCT:
38 /* DCT in horizontal */
39 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
40 in4, in5, in6, in7);
41 /* ADST in vertical */
42 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
43 in3, in4, in5, in6, in7);
44 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
45 in5, in6, in7);
46 break;
47 case DCT_ADST:
48 /* ADST in horizontal */
49 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
50 in5, in6, in7);
51 /* DCT in vertical */
52 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
53 in3, in4, in5, in6, in7);
54 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
55 in4, in5, in6, in7);
56 break;
57 case ADST_ADST:
58 /* ADST in horizontal */
59 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
60 in5, in6, in7);
61 /* ADST in vertical */
62 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
63 in3, in4, in5, in6, in7);
64 VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
65 in5, in6, in7);
66 break;
67 default: assert(0); break;
68 }
69
70 /* final rounding (add 2^4, divide by 2^5) and shift */
71 SRARI_H4_SH(in0, in1, in2, in3, 5);
72 SRARI_H4_SH(in4, in5, in6, in7, 5);
73
74 /* add block and store 8x8 */
75 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
76 dst += (4 * dst_stride);
77 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
78 }
79