• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4 #include <cutils/memory.h>
5 #include <time.h>
6 
7 /*
8  * All systems must implement or emulate the rdhwr instruction to read
9  * the userlocal register. Systems that emulate also return teh count register
10  * when accessing register $2 so this should work on most systems
11  */
12 #define USE_RDHWR
13 
14 #ifdef USE_RDHWR
15 #define UNITS "cycles"
16 #define SCALE 2			/* Most CPU's */
17 static inline uint32_t
get_count(void)18 get_count(void)
19 {
20   uint32_t res;
21   asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory");
22   return res;
23 }
24 #else
25 #define UNITS "ns"
26 #define SCALE 1
27 static inline uint32_t
get_count(void)28 get_count(void)
29 {
30   struct timespec now;
31   uint32_t res;
32   clock_gettime(CLOCK_REALTIME, &now);
33   res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec);
34   // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res);
35   return res;
36 }
37 #endif
38 
39 uint32_t overhead;
40 void
measure_overhead(void)41 measure_overhead(void)
42 {
43   int i;
44   uint32_t start, stop, delta;
45   for (i = 0; i < 32; i++) {
46     start = get_count();
47     stop = get_count();
48     delta = stop - start;
49     if (overhead == 0 || delta < overhead)
50       overhead = delta;
51   }
52   printf("overhead is %d"UNITS"\n", overhead);
53 }
54 
55 uint32_t
timeone(void (* fn)(),void * d,uint32_t val,uint32_t bytes)56 timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes)
57 {
58   uint32_t start, stop, delta;
59   start = get_count();
60   (*fn)(d, val, bytes);
61   stop = get_count();
62   delta = stop - start - overhead;
63   // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta);
64   return delta * SCALE;
65 }
66 
67 /* define VERIFY to check that memset only touches the bytes it's supposed to */
68 /*#define VERIFY*/
69 
70 /*
71  * Using a big arena means that memset will most likely miss in the cache
72  * NB Enabling verification effectively warms up the cache...
73  */
74 #define ARENASIZE 0x1000000
75 #ifdef VERIFY
76 char arena[ARENASIZE+8];	/* Allow space for guard words */
77 #else
78 char arena[ARENASIZE];
79 #endif
80 
81 void
testone(char * tag,void (* fn)(),int trials,int minbytes,int maxbytes,int size,int threshold)82 testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold)
83 {
84   int offset;
85   void *d;
86   void *p;
87   uint32_t v, notv = 0;
88   uint32_t n;
89   int i, units;
90   int totalunits = 0, totalbytes = 0, samples = 0;
91 
92   /* Reset RNG to ensure each test uses same random values */
93   srand(0);			/* FIXME should be able to use some other seed than 0 */
94 
95   for (i = 0; i < trials; i++) {
96     n = minbytes + (rand() % (maxbytes-minbytes));	/* How many bytes to do */
97     offset = ((rand() % (ARENASIZE-n)));		/* Where to start */
98 
99 #ifdef VERIFY
100     offset += 4;		/* Allow space for guard word at beginning */
101 #endif
102     v = rand();
103 
104     /* Adjust alignment and sizes based on transfer size */
105     switch (size) {
106     case 1:
107       v &= 0xff;
108       notv = ~v & 0xff;
109       break;
110     case 2:
111       v &= 0xffff;
112       notv = ~v & 0xffff;
113       offset &= ~1;
114       n &= ~1;
115       break;
116     case 4:
117       notv = ~v;
118       offset &= ~3;
119       n &= ~3;
120       break;
121     }
122 
123     d = &arena[offset];
124 
125 #ifdef VERIFY
126     /* Initialise the area and guard words */
127     for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
128       if (size == 1)
129 	*(uint8_t *)p = notv;
130       else if (size == 2)
131 	*(uint16_t *)p = notv;
132       else if (size == 4)
133 	*(uint32_t *)p = notv;
134     }
135 #endif
136     units = timeone(fn, d, v, n);
137 #ifdef VERIFY
138     /* Check the area and guard words */
139     for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
140       uint32_t got = 0;
141       if (size == 1)
142 	got = *(uint8_t *)p;
143       else if (size == 2)
144 	got = *(uint16_t *)p;
145       else if (size == 4)
146 	got = *(uint32_t *)p;
147       if (p < (void *)&arena[offset]) {
148 	if (got != notv)
149 	  printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n);
150       }
151       else if (p < (void *)&arena[offset+n]) {
152 	if (got != v)
153 	  printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
154       }
155       else {
156 	if (got != notv)
157 	  printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
158       }
159     }
160 #endif
161 
162     /* If the cycle count looks reasonable include it in the statistics */
163     if (units < threshold) {
164       totalbytes += n;
165       totalunits += units;
166       samples++;
167     }
168   }
169 
170   printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n",
171 	 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits);
172 }
173 
174 extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size);
175 extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size);
176 extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size);
177 extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size);
178 extern void memset_cmips(void* dst, int value, size_t size);
179 extern void memset_omips(void* dst, int value, size_t size);
180 
181 int
main(int argc,char ** argv)182 main(int argc, char **argv)
183 {
184   int i;
185   struct {
186     char *type;
187     int trials;
188     int minbytes, maxbytes;
189   } *pp, params[] = {
190     {"small",  10000,   0,   64},
191     {"medium", 10000,  64,  512},
192     {"large",  10000, 512, 1280},
193     {"varied", 10000,   0, 1280},
194   };
195 #define NPARAMS (sizeof(params)/sizeof(params[0]))
196   struct {
197     char *name;
198     void (*fn)();
199     int size;
200   } *fp, functions[] = {
201     {"dmemset16", (void (*)())android_memset16_dumb, 2},
202     {"tmemset16", (void (*)())android_memset16_test, 2},
203     {"lmemset16", (void (*)())android_memset16,      2},
204 
205     {"dmemset32", (void (*)())android_memset32_dumb, 4},
206     {"tmemset32", (void (*)())android_memset32_test, 4},
207     {"lmemset32", (void (*)())android_memset32,      4},
208 
209     {"cmemset",    (void (*)())memset_cmips,         1},
210     {"omemset",    (void (*)())memset_omips,         1},
211     {"lmemset",    (void (*)())memset,               1},
212   };
213 #define NFUNCTIONS (sizeof(functions)/sizeof(functions[0]))
214   char tag[40];
215   int threshold;
216 
217   measure_overhead();
218 
219   /* Warm up the page cache */
220   memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */
221 
222   for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) {
223     (fp->fn)(arena, 0xffffffff, ARENASIZE);	/* one call to get the code into Icache */
224     for (pp = params; pp < &params[NPARAMS]; pp++) {
225       sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes);
226 
227       /* Set the cycle threshold */
228       threshold = pp->maxbytes * 4 * 10;	/* reasonable for cycles and ns */
229       testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold);
230     }
231     printf ("\n");
232   }
233 
234   return 0;
235 }
236