1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h"
12 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
13
14 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct1.
15 // Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from
16 // entropy_coding.c file.
WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[],const int32_t matrix1[],int32_t matrix_product[],const int matrix1_index_factor1,const int matrix0_index_factor1,const int matrix1_index_init_case,const int matrix1_index_step,const int matrix0_index_step,const int inner_loop_count,const int mid_loop_count,const int shift)17 void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[],
18 const int32_t matrix1[],
19 int32_t matrix_product[],
20 const int matrix1_index_factor1,
21 const int matrix0_index_factor1,
22 const int matrix1_index_init_case,
23 const int matrix1_index_step,
24 const int matrix0_index_step,
25 const int inner_loop_count,
26 const int mid_loop_count,
27 const int shift) {
28 if (matrix1_index_init_case != 0) {
29 int j = SUBFRAMES, k = 0, n = 0;
30 int32_t r0, r1, r2, sum32;
31 int32_t* product_start = matrix_product;
32 int32_t* product_ptr;
33 const uint32_t product_step = 4 * mid_loop_count;
34 const uint32_t matrix0_step = 2 * matrix0_index_step;
35 const uint32_t matrix1_step = 4 * matrix1_index_step;
36 const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
37 const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
38 const int16_t* matrix0_start = matrix0;
39 const int32_t* matrix1_start = matrix1;
40 int16_t* matrix0_ptr;
41 int32_t* matrix1_ptr;
42
43 __asm __volatile (
44 ".set push \n\t"
45 ".set noreorder \n\t"
46 "1: \n\t"
47 "addu %[product_ptr], %[product_start], $0 \n\t"
48 "addu %[k], %[product_step], $0 \n\t"
49 "addiu %[j], %[j], -1 \n\t"
50 "addu %[matrix1_start], %[matrix1], $0 \n\t"
51 "2: \n\t"
52 "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t"
53 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t"
54 "addu %[n], %[inner_loop_count], $0 \n\t"
55 "mul %[sum32], $0, $0 \n\t"
56 "3: \n\t"
57 "lw %[r0], 0(%[matrix1_ptr]) \n\t"
58 "lh %[r1], 0(%[matrix0_ptr]) \n\t"
59 "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t"
60 "sllv %[r0], %[r0], %[shift] \n\t"
61 "andi %[r2], %[r0], 0xffff \n\t"
62 "sra %[r2], %[r2], 1 \n\t"
63 "mul %[r2], %[r2], %[r1] \n\t"
64 "sra %[r0], %[r0], 16 \n\t"
65 "mul %[r0], %[r0], %[r1] \n\t"
66 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t"
67 "addiu %[n], %[n], -1 \n\t"
68 #if defined(MIPS_DSP_R1_LE)
69 "shra_r.w %[r2], %[r2], 15 \n\t"
70 #else
71 "addiu %[r2], %[r2], 0x4000 \n\t"
72 "sra %[r2], %[r2], 15 \n\t"
73 #endif
74 "addu %[sum32], %[sum32], %[r2] \n\t"
75 "bgtz %[n], 3b \n\t"
76 " addu %[sum32], %[sum32], %[r0] \n\t"
77 "addiu %[k], %[k], -4 \n\t"
78 "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t"
79 "sw %[sum32], 0(%[product_ptr]) \n\t"
80 "bgtz %[k], 2b \n\t"
81 " addiu %[product_ptr], %[product_ptr], 4 \n\t"
82 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t"
83 "bgtz %[j], 1b \n\t"
84 " addu %[product_start], %[product_start], %[product_step] \n\t"
85 ".set pop \n\t"
86 : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
87 [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start),
88 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
89 [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
90 [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
91 : [product_step] "r" (product_step), [matrix1] "r" (matrix1),
92 [inner_loop_count] "r" (inner_loop_count),
93 [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
94 [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
95 [matrix0_step2] "r" (matrix0_step2)
96 : "hi", "lo", "memory"
97 );
98 } else {
99 int j = SUBFRAMES, k = 0, n = 0;
100 int32_t r0, r1, r2, sum32;
101 int32_t* product_start = matrix_product;
102 int32_t* product_ptr;
103 const uint32_t product_step = 4 * mid_loop_count;
104 const uint32_t matrix0_step = 2 * matrix0_index_step;
105 const uint32_t matrix1_step = 4 * matrix1_index_step;
106 const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
107 const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
108 const int16_t* matrix0_start = matrix0;
109 const int32_t* matrix1_start = matrix1;
110 int16_t* matrix0_ptr;
111 int32_t* matrix1_ptr;
112
113 __asm __volatile (
114 ".set push \n\t"
115 ".set noreorder \n\t"
116 "1: \n\t"
117 "addu %[product_ptr], %[product_start], $0 \n\t"
118 "addu %[k], %[product_step], $0 \n\t"
119 "addiu %[j], %[j], -1 \n\t"
120 "addu %[matrix0_start], %[matrix0], $0 \n\t"
121 "2: \n\t"
122 "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t"
123 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t"
124 "addu %[n], %[inner_loop_count], $0 \n\t"
125 "mul %[sum32], $0, $0 \n\t"
126 "3: \n\t"
127 "lw %[r0], 0(%[matrix1_ptr]) \n\t"
128 "lh %[r1], 0(%[matrix0_ptr]) \n\t"
129 "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t"
130 "sllv %[r0], %[r0], %[shift] \n\t"
131 "andi %[r2], %[r0], 0xffff \n\t"
132 "sra %[r2], %[r2], 1 \n\t"
133 "mul %[r2], %[r2], %[r1] \n\t"
134 "sra %[r0], %[r0], 16 \n\t"
135 "mul %[r0], %[r0], %[r1] \n\t"
136 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t"
137 "addiu %[n], %[n], -1 \n\t"
138 #if defined(MIPS_DSP_R1_LE)
139 "shra_r.w %[r2], %[r2], 15 \n\t"
140 #else
141 "addiu %[r2], %[r2], 0x4000 \n\t"
142 "sra %[r2], %[r2], 15 \n\t"
143 #endif
144 "addu %[sum32], %[sum32], %[r2] \n\t"
145 "bgtz %[n], 3b \n\t"
146 " addu %[sum32], %[sum32], %[r0] \n\t"
147 "addiu %[k], %[k], -4 \n\t"
148 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t"
149 "sw %[sum32], 0(%[product_ptr]) \n\t"
150 "bgtz %[k], 2b \n\t"
151 " addiu %[product_ptr], %[product_ptr], 4 \n\t"
152 "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t"
153 "bgtz %[j], 1b \n\t"
154 " addu %[product_start], %[product_start], %[product_step] \n\t"
155 ".set pop \n\t"
156 : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
157 [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start),
158 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
159 [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
160 [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
161 : [product_step] "r" (product_step), [matrix0] "r" (matrix0),
162 [inner_loop_count] "r" (inner_loop_count),
163 [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
164 [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
165 [matrix0_step2] "r" (matrix0_step2)
166 : "hi", "lo", "memory"
167 );
168 }
169 }
170
171 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct2.
172 // Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from
173 // entropy_coding.c file.
WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[],const int32_t matrix1[],int32_t matrix_product[],const int matrix0_index_factor,const int matrix0_index_step)174 void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[],
175 const int32_t matrix1[],
176 int32_t matrix_product[],
177 const int matrix0_index_factor,
178 const int matrix0_index_step) {
179 int j = 0, n = 0;
180 int loop_count = SUBFRAMES;
181 const int16_t* matrix0_ptr;
182 const int32_t* matrix1_ptr;
183 const int16_t* matrix0_start = matrix0;
184 const int matrix0_step = 2 * matrix0_index_step;
185 const int matrix0_step2 = 2 * matrix0_index_factor;
186 int32_t r0, r1, r2, r3, r4, sum32, sum32_2;
187
188 __asm __volatile (
189 ".set push \n\t"
190 ".set noreorder \n\t"
191 "addu %[j], %[loop_count], $0 \n\t"
192 "addu %[matrix0_start], %[matrix0], $0 \n\t"
193 "1: \n\t"
194 "addu %[matrix1_ptr], %[matrix1], $0 \n\t"
195 "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t"
196 "addu %[n], %[loop_count], $0 \n\t"
197 "mul %[sum32], $0, $0 \n\t"
198 "mul %[sum32_2], $0, $0 \n\t"
199 "2: \n\t"
200 "lw %[r0], 0(%[matrix1_ptr]) \n\t"
201 "lw %[r1], 4(%[matrix1_ptr]) \n\t"
202 "lh %[r2], 0(%[matrix0_ptr]) \n\t"
203 "andi %[r3], %[r0], 0xffff \n\t"
204 "sra %[r3], %[r3], 1 \n\t"
205 "mul %[r3], %[r3], %[r2] \n\t"
206 "andi %[r4], %[r1], 0xffff \n\t"
207 "sra %[r4], %[r4], 1 \n\t"
208 "mul %[r4], %[r4], %[r2] \n\t"
209 "sra %[r0], %[r0], 16 \n\t"
210 "mul %[r0], %[r0], %[r2] \n\t"
211 "sra %[r1], %[r1], 16 \n\t"
212 "mul %[r1], %[r1], %[r2] \n\t"
213 #if defined(MIPS_DSP_R1_LE)
214 "shra_r.w %[r3], %[r3], 15 \n\t"
215 "shra_r.w %[r4], %[r4], 15 \n\t"
216 #else
217 "addiu %[r3], %[r3], 0x4000 \n\t"
218 "sra %[r3], %[r3], 15 \n\t"
219 "addiu %[r4], %[r4], 0x4000 \n\t"
220 "sra %[r4], %[r4], 15 \n\t"
221 #endif
222 "addiu %[matrix1_ptr], %[matrix1_ptr], 8 \n\t"
223 "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t"
224 "addiu %[n], %[n], -1 \n\t"
225 "addu %[sum32], %[sum32], %[r3] \n\t"
226 "addu %[sum32_2], %[sum32_2], %[r4] \n\t"
227 "addu %[sum32], %[sum32], %[r0] \n\t"
228 "bgtz %[n], 2b \n\t"
229 " addu %[sum32_2], %[sum32_2], %[r1] \n\t"
230 "sra %[sum32], %[sum32], 3 \n\t"
231 "sra %[sum32_2], %[sum32_2], 3 \n\t"
232 "addiu %[j], %[j], -1 \n\t"
233 "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t"
234 "sw %[sum32], 0(%[matrix_product]) \n\t"
235 "sw %[sum32_2], 4(%[matrix_product]) \n\t"
236 "bgtz %[j], 1b \n\t"
237 " addiu %[matrix_product], %[matrix_product], 8 \n\t"
238 ".set pop \n\t"
239 : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start),
240 [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
241 [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2),
242 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
243 [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product)
244 : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0),
245 [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step),
246 [matrix0_step2] "r" (matrix0_step2)
247 : "hi", "lo", "memory"
248 );
249 }
250