• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This program is a thorough test of the LOADVn/STOREVn shadow memory
2 // operations.
3 
4 #include <assert.h>
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include "tests/sys_mman.h"
9 #include "memcheck/memcheck.h"
10 
11 // All the sizes here are in *bytes*, not bits.
12 
13 typedef unsigned char        U1;
14 typedef unsigned short       U2;
15 typedef unsigned int         U4;
16 typedef unsigned long long   U8;
17 
18 typedef float                F4;
19 typedef double               F8;
20 
21 typedef unsigned long        UWord;
22 
23 #define PAGE_SIZE 4096ULL
24 
25 
26 // XXX: should check the error cases for SET/GET_VBITS also
27 
28 // For the byte 'x', build a value of 'size' bytes from that byte, eg:
29 //   size 1 --> x
30 //   size 2 --> xx
31 //   size 4 --> xxxx
32 //   size 8 --> xxxxxxxx
33 // where the 0 bits are seen by Memcheck as defined, and the 1 bits are
34 // seen as undefined (ie. the value of each bit matches its V bit, ie. the
35 // resulting value is the same as its metavalue).
36 //
build(int size,U1 byte)37 U8 build(int size, U1 byte)
38 {
39    int i;
40    U8 mask = 0;
41    U8 shres;
42    U8 res = 0xffffffffffffffffULL, res2;
43    (void)VALGRIND_MAKE_MEM_UNDEFINED(&res, 8);
44    assert(1 == size || 2 == size || 4 == size || 8 == size);
45 
46    for (i = 0; i < size; i++) {
47       mask <<= 8;
48       mask |= (U8)byte;
49    }
50 
51    res &= mask;
52 
53    // res is now considered partially defined, but we know exactly what its
54    // value is (it happens to be the same as its metavalue).
55 
56    (void)VALGRIND_GET_VBITS(&res, &shres, 8);
57    res2 = res;
58    (void)VALGRIND_MAKE_MEM_DEFINED(&res2, 8);  // avoid the 'undefined' warning
59    assert(res2 == shres);
60    return res;
61 }
62 
make_defined(U1 x)63 U1 make_defined ( U1 x )
64 {
65    volatile U1 xx = x;
66    (void)VALGRIND_MAKE_MEM_DEFINED(&xx, 1);
67    return xx;
68 }
69 
check(U1 * arr,int n,char * who)70 void check(U1* arr, int n, char* who)
71 {
72    int i;
73    U1* shadow = malloc(n);
74    U1 arr_i;
75    U8 sum = 0;
76    (void)VALGRIND_GET_VBITS(arr, shadow, n);
77    for (i = 0; i < n; i++) {
78       arr_i = make_defined(arr[i]);
79       if (arr_i != shadow[i]) {
80           fprintf(stderr, "\n\nFAILURE: %s, byte %d -- "
81                           "is 0x%x, should be 0x%x\n\n",
82                           who, i, shadow[i], arr[i]);
83           exit(1);
84       }
85       sum += (U8)arr_i;
86    }
87    free(shadow);
88    printf("test passed, sum = %llu (%9.5f per byte)\n",
89 	  sum, (F8)sum / (F8)n);
90 }
91 
randomU4(void)92 static inline U4 randomU4 ( void )
93 {
94    static U4 n = 0;
95    /* From "Numerical Recipes in C" 2nd Edition */
96    n = 1664525UL * n + 1013904223UL;
97    return n;
98 }
99 
randomU1(void)100 static inline U1 randomU1 ( void )
101 {
102    return 0xFF & (randomU4() >> 13);
103 }
104 
105 // NB!  300000 is really not enough to shake out all failures.
106 // Increasing it by a factor of 256 is, but makes the test take
107 // the best part of an hour.
108 #define N_BYTES  (300000 /* * 256 */)
109 #define N_EVENTS (5 * N_BYTES)
110 
111 
do_test_at(U1 * arr)112 void do_test_at ( U1* arr )
113 {
114    int i;
115 
116    U4 mv1 = 0, mv2 = 0, mv4 = 0, mv8 = 0, mv4f = 0, mv8f = 0;
117 
118    /* Fill arr with random bytes whose shadows match them. */
119    if (0) printf("-------- arr = %p\n", arr);
120 
121    printf("initialising\n");
122    for (i = 0; i < N_BYTES; i++)
123       arr[i] = (U1)build(1, randomU1());
124 
125    printf("post-initialisation check\n");
126    check(arr, N_BYTES, "after initialisation");
127 
128    /* Now do huge numbers of memory copies. */
129    printf("doing copies\n");
130    for (i = 0; i < N_EVENTS; i++) {
131       U4 ty, src, dst;
132       ty  = (randomU4() >> 13) % 5;
133      tryagain:
134       src = (randomU4() >>  1) % N_BYTES;
135       dst = (randomU4() >>  3) % N_BYTES;
136       switch (ty) {
137          case 0: { // U1
138             *(U1*)(arr+dst) = *(U1*)(arr+src);
139 	    mv1++;
140             break;
141          }
142          case 1: { // U2
143             if (src+2 >= N_BYTES || dst+2 >= N_BYTES)
144                goto tryagain;
145             *(U2*)(arr+dst) = *(U2*)(arr+src);
146 	    mv2++;
147             break;
148          }
149          case 2: { // U4
150             if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
151                goto tryagain;
152             *(U4*)(arr+dst) = *(U4*)(arr+src);
153 	    mv4++;
154             break;
155          }
156          case 3: { // U8
157             if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
158                goto tryagain;
159             *(U8*)(arr+dst) = *(U8*)(arr+src);
160 	    mv8++;
161             break;
162          }
163          /* Don't bother with 32-bit floats.  These cause
164             horrible complications, as discussed in sh-mem.c. */
165          /*
166          case 4: { // F4
167             if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
168                goto tryagain;
169             *(F4*)(arr+dst) = *(F4*)(arr+src);
170 	    mv4f++;
171             break;
172          }
173          */
174          case 4: { // F8
175             if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
176                goto tryagain;
177 #if defined(__i386__)
178 	    /* Copying via an x87 register causes the test to fail,
179                because (I think) some obscure values that are FP
180                denormals get changed during the copy due to the FPU
181                normalising, or rounding, or whatever, them.  This
182                causes them to no longer bit-for-bit match the
183                accompanying metadata.  Yet we still need to do a
184                genuine 8-byte load/store to test the relevant memcheck
185                {LOADV8,STOREV8} routines.  Hence use the MMX registers
186                instead, as copying through them should be
187                straightforward.. */
188             __asm__ __volatile__(
189                "movq (%1), %%mm2\n\t"
190                "movq %%mm2, (%0)\n\t"
191                "emms"
192                : : "r"(arr+dst), "r"(arr+src) : "memory"
193             );
194 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
195             /* On arm32, many compilers generate a 64-bit float move
196                using two 32 bit integer registers, which completely
197                defeats this test.  Hence force a 64-bit NEON load and
198                store.  I guess this will break the build on non-NEON
199                capable targets. */
200             __asm__ __volatile__ (
201                "vld1.64 {d7},[%0] ; vst1.64 {d7},[%1] "
202                : : "r"(arr+src), "r"(arr+dst) : "d7","memory"
203             );
204 #else
205             /* Straightforward.  On amd64, this gives a load/store of
206                the bottom half of an xmm register.  On ppc32/64 this
207                is a straighforward load/store of an FP register. */
208             *(F8*)(arr+dst) = *(F8*)(arr+src);
209 #endif
210 	    mv8f++;
211             break;
212          }
213          default:
214 	   fprintf(stderr, "sh-mem-random: bad size\n");
215 	   exit(0);
216       }
217    }
218 
219    printf("final check\n");
220    check(arr, N_BYTES, "final check");
221 
222    printf("counts 1/2/4/8/F4/F8: %d %d %d %d %d %d\n",
223           mv1, mv2, mv4, mv8, mv4f, mv8f);
224 }
225 
226 
227 
main(void)228 int main(void)
229 {
230    U1* arr;
231 
232    if (0 == RUNNING_ON_VALGRIND) {
233       fprintf(stderr, "error: this program only works when run under Valgrind\n");
234       exit(1);
235    }
236 
237    printf("-------- testing non-auxmap range --------\n");
238 
239    arr = malloc(N_BYTES);
240    assert(arr);
241    do_test_at(arr);
242    free(arr);
243 
244    if (sizeof(void*) == 8) {
245       // 64-bit platform.
246       int tries;
247       int nbytes_p;
248       // (U1*)(UWord)constULL funny casting to keep gcc quiet on
249       // 32-bit platforms
250       U1* huge_addr = (U1*)(UWord)0x6600000000ULL;  // 408GB
251       // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests
252       // at above 512GB.
253 
254       printf("-------- testing auxmap range --------\n");
255 
256       nbytes_p = (N_BYTES + PAGE_SIZE) & ~(PAGE_SIZE-1);
257 
258       for (tries = 0; tries < 10; tries++) {
259          arr = mmap(huge_addr, nbytes_p, PROT_READ|PROT_WRITE,
260                     MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
261 	 if (arr != MAP_FAILED)
262             break;
263 	 // hmm. fudge the address and try again.
264          huge_addr += (randomU4() & ~(PAGE_SIZE-1));
265       }
266 
267       if (tries >= 10) {
268 	   fprintf(stderr, "sh-mem-random: can't mmap hi-mem\n");
269 	   exit(0);
270       }
271       assert(arr != MAP_FAILED);
272 
273       do_test_at(arr);
274    }
275 
276    return 0;
277 
278 }
279