1 /*
2
3 Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics
4 Written by Christophe Lyon
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23
24 */
25
26 #if defined(__arm__) || defined(__aarch64__)
27 #include <arm_neon.h>
28 #else
29 #include "stm-arm-neon.h"
30 #endif
31
32 #include "stm-arm-neon-ref.h"
33
exec_vtbX(void)34 void exec_vtbX (void)
35 {
36 int i;
37
38 /* In this case, input variables are arrays of vectors */
39 #define DECL_VTBX(T1, W, N, X) \
40 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X)
41
42 /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a
43 vector as 1st param, instead of an array of vectors */
44 #define TEST_VTBL1(T1, T2, T3, W, N) \
45 VECT_VAR(table_vector, T1, W, N) = \
46 vld1##_##T2##W((T1##W##_t *)lookup_table); \
47 \
48 VECT_VAR(vector_res, T1, W, N) = \
49 vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N), \
50 VECT_VAR(vector, T3, W, N)); \
51 vst1_##T2##W(VECT_VAR(result, T1, W, N), \
52 VECT_VAR(vector_res, T1, W, N));
53
54 #define TEST_VTBLX(T1, T2, T3, W, N, X) \
55 VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \
56 vld##X##_##T2##W((T1##W##_t *)lookup_table); \
57 \
58 VECT_VAR(vector_res, T1, W, N) = \
59 vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X), \
60 VECT_VAR(vector, T3, W, N)); \
61 vst1_##T2##W(VECT_VAR(result, T1, W, N), \
62 VECT_VAR(vector_res, T1, W, N));
63
64 /* With ARM RVCT, we need to declare variables before any executable
65 statement */
66
67 /* We need to define a lookup table */
68 uint8_t lookup_table[32];
69
70 DECL_VARIABLE(vector, int, 8, 8);
71 DECL_VARIABLE(vector, uint, 8, 8);
72 DECL_VARIABLE(vector, poly, 8, 8);
73 DECL_VARIABLE(vector_res, int, 8, 8);
74 DECL_VARIABLE(vector_res, uint, 8, 8);
75 DECL_VARIABLE(vector_res, poly, 8, 8);
76
77 /* For vtbl1 */
78 DECL_VARIABLE(table_vector, int, 8, 8);
79 DECL_VARIABLE(table_vector, uint, 8, 8);
80 DECL_VARIABLE(table_vector, poly, 8, 8);
81
82 /* For vtbx* */
83 DECL_VARIABLE(default_vector, int, 8, 8);
84 DECL_VARIABLE(default_vector, uint, 8, 8);
85 DECL_VARIABLE(default_vector, poly, 8, 8);
86
87 /* We need only 8 bits variants */
88 #define DECL_ALL_VTBLX(X) \
89 DECL_VTBX(int, 8, 8, X); \
90 DECL_VTBX(uint, 8, 8, X); \
91 DECL_VTBX(poly, 8, 8, X)
92
93 #define TEST_ALL_VTBL1() \
94 TEST_VTBL1(int, s, int, 8, 8); \
95 TEST_VTBL1(uint, u, uint, 8, 8); \
96 TEST_VTBL1(poly, p, uint, 8, 8)
97
98 #define TEST_ALL_VTBLX(X) \
99 TEST_VTBLX(int, s, int, 8, 8, X); \
100 TEST_VTBLX(uint, u, uint, 8, 8, X); \
101 TEST_VTBLX(poly, p, uint, 8, 8, X)
102
103 /* Declare the temporary buffers / variables */
104 DECL_ALL_VTBLX(2);
105 DECL_ALL_VTBLX(3);
106 DECL_ALL_VTBLX(4);
107
108 /* Fill the lookup table */
109 for (i=0; i<32; i++) {
110 lookup_table[i] = i-15;
111 }
112
113 /* Choose init value arbitrarily, will be used as table index */
114 VDUP(vector, , int, s, 8, 8, 1);
115 VDUP(vector, , uint, u, 8, 8, 2);
116 VDUP(vector, , poly, p, 8, 8, 2);
117
118 /* To ensure code coverage of lib, add some indexes larger than 8,16 and 32 */
119 /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */
120 TEST_VSET_LANE(vector, , int, s, 8, 8, 0, 10);
121 TEST_VSET_LANE(vector, , int, s, 8, 8, 4, 20);
122 TEST_VSET_LANE(vector, , int, s, 8, 8, 5, 40);
123 TEST_VSET_LANE(vector, , uint, u, 8, 8, 0, 10);
124 TEST_VSET_LANE(vector, , uint, u, 8, 8, 4, 20);
125 TEST_VSET_LANE(vector, , uint, u, 8, 8, 5, 40);
126 TEST_VSET_LANE(vector, , poly, p, 8, 8, 0, 10);
127 TEST_VSET_LANE(vector, , poly, p, 8, 8, 4, 20);
128 TEST_VSET_LANE(vector, , poly, p, 8, 8, 5, 40);
129
130
131 /* Check vtbl1 */
132 clean_results ();
133 #define TEST_MSG "VTBL1"
134 TEST_ALL_VTBL1();
135 dump_results_hex (TEST_MSG);
136
137 /* Check vtbl2 */
138 clean_results ();
139 #undef TEST_MSG
140 #define TEST_MSG "VTBL2"
141 TEST_ALL_VTBLX(2);
142 dump_results_hex (TEST_MSG);
143
144 /* Check vtbl3 */
145 clean_results ();
146 #undef TEST_MSG
147 #define TEST_MSG "VTBL3"
148 TEST_ALL_VTBLX(3);
149 dump_results_hex (TEST_MSG);
150
151 /* Check vtbl4 */
152 clean_results ();
153 #undef TEST_MSG
154 #define TEST_MSG "VTBL4"
155 TEST_ALL_VTBLX(4);
156 dump_results_hex (TEST_MSG);
157
158
159 /* Now test VTBX */
160
161 /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a
162 vector as 1st param, instead of an array of vectors */
163 #define TEST_VTBX1(T1, T2, T3, W, N) \
164 VECT_VAR(table_vector, T1, W, N) = \
165 vld1##_##T2##W((T1##W##_t *)lookup_table); \
166 \
167 VECT_VAR(vector_res, T1, W, N) = \
168 vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N), \
169 VECT_VAR(table_vector, T1, W, N), \
170 VECT_VAR(vector, T3, W, N)); \
171 vst1_##T2##W(VECT_VAR(result, T1, W, N), \
172 VECT_VAR(vector_res, T1, W, N));
173
174 #define TEST_VTBXX(T1, T2, T3, W, N, X) \
175 VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \
176 vld##X##_##T2##W((T1##W##_t *)lookup_table); \
177 \
178 VECT_VAR(vector_res, T1, W, N) = \
179 vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N), \
180 VECT_ARRAY_VAR(table_vector, T1, W, N, X), \
181 VECT_VAR(vector, T3, W, N)); \
182 vst1_##T2##W(VECT_VAR(result, T1, W, N), \
183 VECT_VAR(vector_res, T1, W, N));
184
185 #define TEST_ALL_VTBX1() \
186 TEST_VTBX1(int, s, int, 8, 8); \
187 TEST_VTBX1(uint, u, uint, 8, 8); \
188 TEST_VTBX1(poly, p, uint, 8, 8)
189
190 #define TEST_ALL_VTBXX(X) \
191 TEST_VTBXX(int, s, int, 8, 8, X); \
192 TEST_VTBXX(uint, u, uint, 8, 8, X); \
193 TEST_VTBXX(poly, p, uint, 8, 8, X)
194
195 /* Choose init value arbitrarily, will be used as default value */
196 VDUP(default_vector, , int, s, 8, 8, 0x33);
197 VDUP(default_vector, , uint, u, 8, 8, 0xCC);
198 VDUP(default_vector, , poly, p, 8, 8, 0xCC);
199
200 /* Check vtbx1 */
201 clean_results ();
202 #undef TEST_MSG
203 #define TEST_MSG "VTBX1"
204 TEST_ALL_VTBX1();
205 dump_results_hex (TEST_MSG);
206
207 /* Check vtbx2 */
208 clean_results ();
209 #undef TEST_MSG
210 #define TEST_MSG "VTBX2"
211 TEST_ALL_VTBXX(2);
212 dump_results_hex (TEST_MSG);
213
214 /* Check vtbx3 */
215 clean_results ();
216 #undef TEST_MSG
217 #define TEST_MSG "VTBX3"
218 TEST_ALL_VTBXX(3);
219 dump_results_hex (TEST_MSG);
220
221 /* Check vtbx4 */
222 clean_results ();
223 #undef TEST_MSG
224 #define TEST_MSG "VTBX4"
225 TEST_ALL_VTBXX(4);
226 dump_results_hex (TEST_MSG);
227 }
228