1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <stdio.h>
13
14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
20 #include "vpx_dsp/txfm_common.h"
21 #include "vpx_ports/mem.h"
22
23 #if HAVE_DSPR2
vp9_iht16x16_256_add_dspr2(const int16_t * input,uint8_t * dest,int pitch,int tx_type)24 void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
25 int pitch, int tx_type) {
26 int i, j;
27 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
28 int16_t *outptr = out;
29 int16_t temp_out[16];
30 uint32_t pos = 45;
31
32 /* bit positon for extract from acc */
33 __asm__ __volatile__ (
34 "wrdsp %[pos], 1 \n\t"
35 :
36 : [pos] "r" (pos)
37 );
38
39 switch (tx_type) {
40 case DCT_DCT: // DCT in both horizontal and vertical
41 idct16_rows_dspr2(input, outptr, 16);
42 idct16_cols_add_blk_dspr2(out, dest, pitch);
43 break;
44 case ADST_DCT: // ADST in vertical, DCT in horizontal
45 idct16_rows_dspr2(input, outptr, 16);
46
47 outptr = out;
48
49 for (i = 0; i < 16; ++i) {
50 iadst16_dspr2(outptr, temp_out);
51
52 for (j = 0; j < 16; ++j)
53 dest[j * pitch + i] =
54 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
55 + dest[j * pitch + i]);
56 outptr += 16;
57 }
58 break;
59 case DCT_ADST: // DCT in vertical, ADST in horizontal
60 {
61 int16_t temp_in[16 * 16];
62
63 for (i = 0; i < 16; ++i) {
64 /* prefetch row */
65 prefetch_load((const uint8_t *)(input + 16));
66
67 iadst16_dspr2(input, outptr);
68 input += 16;
69 outptr += 16;
70 }
71
72 for (i = 0; i < 16; ++i)
73 for (j = 0; j < 16; ++j)
74 temp_in[j * 16 + i] = out[i * 16 + j];
75
76 idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
77 }
78 break;
79 case ADST_ADST: // ADST in both directions
80 {
81 int16_t temp_in[16];
82
83 for (i = 0; i < 16; ++i) {
84 /* prefetch row */
85 prefetch_load((const uint8_t *)(input + 16));
86
87 iadst16_dspr2(input, outptr);
88 input += 16;
89 outptr += 16;
90 }
91
92 for (i = 0; i < 16; ++i) {
93 for (j = 0; j < 16; ++j)
94 temp_in[j] = out[j * 16 + i];
95 iadst16_dspr2(temp_in, temp_out);
96 for (j = 0; j < 16; ++j)
97 dest[j * pitch + i] =
98 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
99 + dest[j * pitch + i]);
100 }
101 }
102 break;
103 default:
104 printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n");
105 break;
106 }
107 }
108 #endif // #if HAVE_DSPR2
109