1
2 // Tests shadow memory correctness for 16-byte/32-byte/etc. vector
3 // loads/stores. Requires vector_copy() and VECTOR_BYTES to be
4 // specified somehow.
5
6 #ifndef VECTOR_BYTES
7 #error "VECTOR_BYTES must be defined"
8 #endif
9
10 #include <assert.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include "tests/malloc.h"
15 #include "memcheck/memcheck.h"
16
17 // What we're actually testing
18 // .. is vector_copy, which should be defined before this point
19
20 // All the sizes here are in *bytes*, not bits.
21
22 typedef unsigned char U1;
23 typedef unsigned short U2;
24 typedef unsigned int U4;
25 typedef unsigned long long U8;
26 typedef unsigned long int UWord;
27
28 typedef unsigned char Bool;
29 #define True ((Bool)1)
30 #define False ((Bool)0)
31
32 #define CFENCE __asm__ __volatile__("":::"cc","memory")
33
get_endianness(void)34 static __attribute__((noinline)) const char* get_endianness ( void )
35 {
36 volatile U4 w32 = 0x88776655;
37 volatile U1* p = (U1*)&w32;
38 if (p[0] == 0x55) {
39 assert(p[3] == 0x88);
40 return "little";
41 }
42 if (p[0] == 0x88) {
43 assert(p[3] == 0x55);
44 return "big";
45 }
46 assert(0);
47 }
48
randomU4(void)49 static inline U4 randomU4 ( void )
50 {
51 static U4 n = 0;
52 /* From "Numerical Recipes in C" 2nd Edition */
53 n = 1664525UL * n + 1013904223UL;
54 return n;
55 }
56
randomU1(void)57 static inline U1 randomU1 ( void )
58 {
59 return 0xFF & (randomU4() >> 13);
60 }
61
62 #define N_BYTES 80000
63 #define N_EVENTS (N_BYTES * 2)
64
65 // Return x, but with its definedness bits set to be its own value bits
self_shadow(U1 x)66 static inline U1 self_shadow ( U1 x )
67 {
68 U1 res = 0xFF;
69 (void) VALGRIND_MAKE_MEM_UNDEFINED(&res, 1);
70 res &= x;
71 return res;
72 }
73
get_shadow(U1 x)74 static inline U1 get_shadow ( U1 x )
75 {
76 U1 res = 0;
77 U4 r = VALGRIND_GET_VBITS(&x, &res, 1);
78 assert(r == 1 || r == 0);
79 return res;
80 }
81
make_def(U1 x)82 static inline U1 make_def ( U1 x )
83 {
84 U1 y = x;
85 (void) VALGRIND_MAKE_MEM_DEFINED(&y, 1);
86 return y;
87 }
88
make_undef(U1 x)89 static inline U1 make_undef ( U1 x )
90 {
91 U1 y = x;
92 (void) VALGRIND_MAKE_MEM_UNDEFINED(&y, 1);
93 return y;
94 }
95
make_noaccess(U1 * dst)96 static void make_noaccess ( U1* dst )
97 {
98 (void) VALGRIND_MAKE_MEM_NOACCESS(dst, 1);
99 }
100
apply(void (* fn)(U4,Bool),U4 arg1,Bool arg2)101 static void apply ( void(*fn)(U4,Bool), U4 arg1, Bool arg2 )
102 {
103 switch (arg1 & (32-1)) {
104 case 0: CFENCE; fn(arg1, arg2); CFENCE; break;
105 case 1: CFENCE; fn(arg1, arg2); CFENCE; break;
106 case 2: CFENCE; fn(arg1, arg2); CFENCE; break;
107 case 3: CFENCE; fn(arg1, arg2); CFENCE; break;
108 case 4: CFENCE; fn(arg1, arg2); CFENCE; break;
109 case 5: CFENCE; fn(arg1, arg2); CFENCE; break;
110 case 6: CFENCE; fn(arg1, arg2); CFENCE; break;
111 case 7: CFENCE; fn(arg1, arg2); CFENCE; break;
112 case 8: CFENCE; fn(arg1, arg2); CFENCE; break;
113 case 9: CFENCE; fn(arg1, arg2); CFENCE; break;
114 case 10: CFENCE; fn(arg1, arg2); CFENCE; break;
115 case 11: CFENCE; fn(arg1, arg2); CFENCE; break;
116 case 12: CFENCE; fn(arg1, arg2); CFENCE; break;
117 case 13: CFENCE; fn(arg1, arg2); CFENCE; break;
118 case 14: CFENCE; fn(arg1, arg2); CFENCE; break;
119 case 15: CFENCE; fn(arg1, arg2); CFENCE; break;
120 case 16: CFENCE; fn(arg1, arg2); CFENCE; break;
121 case 17: CFENCE; fn(arg1, arg2); CFENCE; break;
122 case 18: CFENCE; fn(arg1, arg2); CFENCE; break;
123 case 19: CFENCE; fn(arg1, arg2); CFENCE; break;
124 case 20: CFENCE; fn(arg1, arg2); CFENCE; break;
125 case 21: CFENCE; fn(arg1, arg2); CFENCE; break;
126 case 22: CFENCE; fn(arg1, arg2); CFENCE; break;
127 case 23: CFENCE; fn(arg1, arg2); CFENCE; break;
128 case 24: CFENCE; fn(arg1, arg2); CFENCE; break;
129 case 25: CFENCE; fn(arg1, arg2); CFENCE; break;
130 case 26: CFENCE; fn(arg1, arg2); CFENCE; break;
131 case 27: CFENCE; fn(arg1, arg2); CFENCE; break;
132 case 28: CFENCE; fn(arg1, arg2); CFENCE; break;
133 case 29: CFENCE; fn(arg1, arg2); CFENCE; break;
134 case 30: CFENCE; fn(arg1, arg2); CFENCE; break;
135 case 31: CFENCE; fn(arg1, arg2); CFENCE; break;
136 default: CFENCE; fn(arg1, arg2); CFENCE; break;
137 }
138 }
139
140 // Try doing some partial-loads-ok/not-ok testing.
141 /* Test cases:
142 - load, aligned, all no-access
143 ==> addr err
144 - load, aligned, 1 to VECTOR_BYTES-1 initial bytes accessible,
145 then at least one unaccessible byte,
146 then remaining bytes in any state.
147 ==> if PLO then no error, but returned V bits are undefined
148 for unaccessible bytes
149 else
150 error; and V bits are defined for unaccessible bytes
151
152 All of the above, but non-aligned:
153 -- all return an addressing error
154 */
155
do_partial_load_case(U4 nInitialValid,Bool aligned)156 static void do_partial_load_case ( U4 nInitialValid, Bool aligned )
157 {
158 fprintf(stderr,
159 "------ PL %s case with %u leading acc+def bytes ------\n\n",
160 aligned ? "Aligned" : "Unaligned", nInitialValid);
161
162 void *temp;
163 if (posix_memalign(&temp, VECTOR_BYTES, 64) != 0)
164 abort();
165 U1* block = temp;
166 U4 j;
167 for (j = 0; j < 64; j++) block[j] = 0;
168
169 if (!aligned) block++;
170
171 // Make the block have this pattern:
172 // block[0 .. i-1] accessible and defined
173 // block[i .. VECTOR_BYTES-1] repeating NOACCESS, UNDEF, DEF
174 // hence block[i], at the very least, is always NOACCESS
175 U4 i = nInitialValid;
176 for (j = i; j < VECTOR_BYTES; j++) {
177 switch ((j-i) % 3) {
178 case 0: make_noaccess(&block[j]); break;
179 case 1: block[j] = make_undef(block[j]); break;
180 case 2: /* already acc and def */ break;
181 }
182 }
183
184 // Do the access, possibly generating an error, and show the
185 // resulting V bits
186 U1 dst[VECTOR_BYTES];
187 vector_copy(&dst[0], block);
188
189 U1 dst_vbits[VECTOR_BYTES];
190 U4 r = VALGRIND_GET_VBITS(&dst[0], &dst_vbits[0], VECTOR_BYTES);
191 assert(r == 1 || r == 0);
192
193 fprintf(stderr, "\n");
194 for (j = 0; j < VECTOR_BYTES; j++) {
195 fprintf(stderr, "%c", dst_vbits[j] == 0 ? 'd'
196 : dst_vbits[j] == 0xFF ? 'U' : '?');
197 }
198 fprintf(stderr, "\n\n");
199
200 // Also let's use the resulting value, to check we get an undef
201 // error
202 U1 sum = 0;
203 for (j = 0; j < VECTOR_BYTES; j++)
204 sum ^= dst[j];
205
206 if (sum == 42) {
207 CFENCE; fprintf(stderr, "%s", ""); CFENCE;
208 } else {
209 CFENCE; fprintf(stderr, "%s", ""); CFENCE;
210 }
211
212 fprintf(stderr, "\n");
213
214 if (!aligned) block--;
215 free(block);
216 }
217
main(void)218 int main ( void )
219 {
220 fprintf(stderr, "sh-mem-vec%d: config: %s-endian, %d-bit word size\n",
221 VECTOR_BYTES * 8, get_endianness(), (int)(8 * sizeof(void*)));
222
223 U4 i;
224 void *temp;
225 if (posix_memalign(&temp, VECTOR_BYTES, N_BYTES) != 0)
226 abort();
227 U1* buf = temp;
228
229 // Fill |buf| with bytes, so that zero bits have a zero shadow
230 // (are defined) and one bits have a one shadow (are undefined)
231 for (i = 0; i < N_BYTES/2; i++) {
232 buf[i] = self_shadow( (i & (1<<5)) ? 0x00 : 0xFF );
233 }
234 for ( ; i < N_BYTES; i++) {
235 buf[i] = self_shadow( randomU1() );
236 }
237
238 // Randomly copy the data around. Once every 8 srcs/dsts, force
239 // the src or dst to be aligned. Once every 64, force both to be
240 // aligned. So as to give the fast (aligned) paths some checking.
241 const U4 n_copies = N_EVENTS;
242 U4 n_d_aligned = 0;
243 U4 n_s_aligned = 0;
244 U4 n_both_aligned = 0;
245 U4 n_fails = 0;
246
247 for (i = 0; i < n_copies; i++) {
248 U4 si = randomU4() % (N_BYTES-VECTOR_BYTES);
249 U4 di = randomU4() % (N_BYTES-VECTOR_BYTES);
250 if (0 == (randomU1() & 7)) si &= ~(VECTOR_BYTES-1);
251 if (0 == (randomU1() & 7)) di &= ~(VECTOR_BYTES-1);
252 if (0 == (randomU1() & 63)) { di &= ~(VECTOR_BYTES-1); si &= ~(VECTOR_BYTES-1); }
253
254 void* dst = &buf[di];
255 void* src = &buf[si];
256
257 if (0 == (((UWord)src) & (VECTOR_BYTES-1))) n_s_aligned++;
258 if (0 == (((UWord)dst) & (VECTOR_BYTES-1))) n_d_aligned++;
259 if (0 == (((UWord)src) & (VECTOR_BYTES-1)) && 0 == (((UWord)dst) & (VECTOR_BYTES-1)))
260 n_both_aligned++;
261
262 vector_copy(dst, src);
263 }
264
265 U4 freq[256];
266 for (i = 0; i < 256; i++)
267 freq[i] = 0;
268
269 for (i = 0; i < N_BYTES; i++) {
270 //if (i > 0 && 0 == (i & 0x0F)) fprintf(stderr, "\n");
271 U1 v_actual = make_def(buf[i]);
272 U1 v_shadow = get_shadow(buf[i]);
273 if (v_actual != v_shadow) n_fails++;
274 //fprintf(stderr, "%02x:%02x ", (U4)v_actual, (U4)v_shadow);
275 freq[(U4)v_actual]++;
276 }
277
278 fprintf(stderr, "\n");
279 U4 totFreq = 0;
280 for (i = 0; i < 256; i++) {
281 totFreq += freq[i];
282 if (i > 0 && (0 == (i % 16))) fprintf(stderr, "\n");
283 fprintf(stderr, "%5u ", freq[i]);
284 }
285 assert(totFreq == N_BYTES);
286
287 fprintf(stderr, "\n\n");
288 fprintf(stderr, "%u copies, %u d_aligned, %u s_aligned, %u both_aligned\n",
289 n_copies, n_d_aligned, n_s_aligned, n_both_aligned);
290 fprintf(stderr, "%u %s\n", n_fails, n_fails == 0 ? "failures" : "FAILURES");
291
292 // Check that we can detect underruns of the block.
293 fprintf(stderr, "\nExpect 2 x no error\n" );
294 vector_copy( &buf[100], &buf[0] );
295 vector_copy( &buf[0], &buf[100] );
296
297 fprintf(stderr, "\nExpect 2 x error\n\n" );
298 vector_copy( &buf[100], &buf[-1] ); // invalid rd
299 vector_copy( &buf[-1], &buf[100] ); // invalid wr
300
301 // and overruns ..
302 fprintf(stderr, "\nExpect 2 x no error\n" );
303 vector_copy( &buf[200], &buf[N_BYTES-VECTOR_BYTES + 0] );
304 vector_copy( &buf[N_BYTES-VECTOR_BYTES + 0], &buf[200] );
305
306 fprintf(stderr, "\nExpect 2 x error\n\n" );
307 vector_copy( &buf[200], &buf[N_BYTES-VECTOR_BYTES + 1] );
308 vector_copy( &buf[N_BYTES-VECTOR_BYTES + 1], &buf[200] );
309
310 free(buf);
311 fprintf(stderr, "\n");
312
313 for (i = 0; i < VECTOR_BYTES; i++)
314 apply( do_partial_load_case, i, True/*aligned*/ );
315
316 for (i = 0; i < VECTOR_BYTES; i++)
317 apply( do_partial_load_case, i, False/*not aligned*/ );
318
319 return 0;
320 }
321