• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** Copyright 2010 The Android Open Source Project
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16 
17 /*
18  * Micro-benchmarking of sleep/cpu speed/memcpy/memset/memory reads.
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <ctype.h>
24 #include <math.h>
25 #include <sched.h>
26 #include <sys/resource.h>
27 #include <time.h>
28 #include <unistd.h>
29 
30 // The default size of data that will be manipulated in each iteration of
31 // a memory benchmark. Can be modified with the --data_size option.
32 #define DEFAULT_DATA_SIZE       1000000000
33 
34 // Number of nanoseconds in a second.
35 #define NS_PER_SEC              1000000000
36 
37 // The maximum number of arguments that a benchmark will accept.
38 #define MAX_ARGS    2
39 
40 // Use macros to compute values to try and avoid disturbing memory as much
41 // as possible after each iteration.
42 #define COMPUTE_AVERAGE_KB(avg_kb, bytes, time_ns) \
43         avg_kb = ((bytes) / 1024.0) / ((double)(time_ns) / NS_PER_SEC);
44 
45 #define COMPUTE_RUNNING(avg, running_avg, square_avg, cur_idx) \
46     running_avg = ((running_avg) / ((cur_idx) + 1)) * (cur_idx) + (avg) / ((cur_idx) + 1); \
47     square_avg = ((square_avg) / ((cur_idx) + 1)) * (cur_idx) + ((avg) / ((cur_idx) + 1)) * (avg);
48 
49 #define GET_STD_DEV(running_avg, square_avg) \
50     sqrt((square_avg) - (running_avg) * (running_avg))
51 
52 // Contains information about benchmark options.
53 typedef struct {
54     bool print_average;
55     bool print_each_iter;
56 
57     int dst_align;
58     int src_align;
59 
60     int cpu_to_lock;
61 
62     int data_size;
63 
64     int args[MAX_ARGS];
65     int num_args;
66 } command_data_t;
67 
68 // Struct that contains a mapping of benchmark name to benchmark function.
69 typedef struct {
70     const char *name;
71     int (*ptr)(const command_data_t &cmd_data);
72 } function_t;
73 
74 // Get the current time in nanoseconds.
nanoTime()75 uint64_t nanoTime() {
76   struct timespec t;
77 
78   t.tv_sec = t.tv_nsec = 0;
79   clock_gettime(CLOCK_MONOTONIC, &t);
80   return static_cast<uint64_t>(t.tv_sec) * NS_PER_SEC + t.tv_nsec;
81 }
82 
83 // Allocate memory with a specific alignment and return that pointer.
84 // This function assumes an alignment value that is a power of 2.
85 // If the alignment is 0, then use the pointer returned by malloc.
allocateAlignedMemory(size_t size,int alignment)86 uint8_t *allocateAlignedMemory(size_t size, int alignment) {
87   uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 2 * alignment));
88   if (!ptr)
89       return NULL;
90   if (alignment > 0) {
91       // When setting the alignment, set it to exactly the alignment chosen.
92       // The pointer returned will be guaranteed not to be aligned to anything
93       // more than that.
94       ptr += alignment - (ptr & (alignment - 1));
95       ptr |= alignment;
96   }
97 
98   return reinterpret_cast<uint8_t*>(ptr);
99 }
100 
benchmarkSleep(const command_data_t & cmd_data)101 int benchmarkSleep(const command_data_t &cmd_data) {
102     uint64_t time_ns;
103 
104     int delay = cmd_data.args[0];
105     int iters = cmd_data.args[1];
106     bool print_each_iter = cmd_data.print_each_iter;
107     bool print_average = cmd_data.print_average;
108     double avg, running_avg = 0.0, square_avg = 0.0;
109     for (int i = 0; iters == -1 || i < iters; i++) {
110         time_ns = nanoTime();
111         sleep(delay);
112         time_ns = nanoTime() - time_ns;
113 
114         avg = (double)time_ns / NS_PER_SEC;
115 
116         if (print_average) {
117             COMPUTE_RUNNING(avg, running_avg, square_avg, i);
118         }
119 
120         if (print_each_iter) {
121             printf("sleep(%d) took %.06f seconds\n", delay, avg);
122         }
123     }
124 
125     if (print_average) {
126         printf("  sleep(%d) average %.06f seconds std dev %f\n", delay,
127                running_avg, GET_STD_DEV(running_avg, square_avg));
128     }
129 
130     return 0;
131 }
132 
benchmarkCpu(const command_data_t & cmd_data)133 int benchmarkCpu(const command_data_t &cmd_data) {
134     // Use volatile so that the loop is not optimized away by the compiler.
135     volatile int cpu_foo;
136 
137     uint64_t time_ns;
138     int iters = cmd_data.args[1];
139     bool print_each_iter = cmd_data.print_each_iter;
140     bool print_average = cmd_data.print_average;
141     double avg, running_avg = 0.0, square_avg = 0.0;
142     for (int i = 0; iters == -1 || i < iters; i++) {
143         time_ns = nanoTime();
144         for (cpu_foo = 0; cpu_foo < 100000000; cpu_foo++);
145         time_ns = nanoTime() - time_ns;
146 
147         avg = (double)time_ns / NS_PER_SEC;
148 
149         if (print_average) {
150             COMPUTE_RUNNING(avg, running_avg, square_avg, i);
151         }
152 
153         if (print_each_iter) {
154             printf("cpu took %.06f seconds\n", avg);
155         }
156     }
157 
158     if (print_average) {
159         printf("  cpu average %.06f seconds std dev %f\n",
160                running_avg, GET_STD_DEV(running_avg, square_avg));
161     }
162 
163     return 0;
164 }
165 
benchmarkMemset(const command_data_t & cmd_data)166 int benchmarkMemset(const command_data_t &cmd_data) {
167     int size = cmd_data.args[0];
168     int iters = cmd_data.args[1];
169 
170     uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
171     if (!dst)
172         return -1;
173 
174     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
175     uint64_t time_ns;
176     int j;
177     bool print_average = cmd_data.print_average;
178     bool print_each_iter = cmd_data.print_each_iter;
179     int copies = cmd_data.data_size/size;
180     for (int i = 0; iters == -1 || i < iters; i++) {
181         time_ns = nanoTime();
182         for (j = 0; j < copies; j++)
183             memset(dst, 0, size);
184         time_ns = nanoTime() - time_ns;
185 
186         // Compute in kb to avoid any overflows.
187         COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
188 
189         if (print_average) {
190             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
191         }
192 
193         if (print_each_iter) {
194             printf("memset %dx%d bytes took %.06f seconds (%f MB/s)\n",
195                    copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
196         }
197     }
198 
199     if (print_average) {
200         printf("  memset %dx%d bytes average %.2f MB/s std dev %.4f\n",
201                copies, size, running_avg_kb / 1024.0,
202                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
203     }
204     return 0;
205 }
206 
benchmarkMemcpy(const command_data_t & cmd_data)207 int benchmarkMemcpy(const command_data_t &cmd_data) {
208     int size = cmd_data.args[0];
209     int iters = cmd_data.args[1];
210 
211     uint8_t *src = allocateAlignedMemory(size, cmd_data.src_align);
212     if (!src)
213         return -1;
214     uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
215     if (!dst)
216         return -1;
217 
218     uint64_t time_ns;
219     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
220     int j;
221     bool print_average = cmd_data.print_average;
222     bool print_each_iter = cmd_data.print_each_iter;
223     int copies = cmd_data.data_size / size;
224     for (int i = 0; iters == -1 || i < iters; i++) {
225         time_ns = nanoTime();
226         for (j = 0; j < copies; j++)
227             memcpy(dst, src, size);
228         time_ns = nanoTime() - time_ns;
229 
230         // Compute in kb to avoid any overflows.
231         COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
232 
233         if (print_average) {
234             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
235         }
236 
237         if (print_each_iter) {
238             printf("memcpy %dx%d bytes took %.06f seconds (%f MB/s)\n",
239                    copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
240         }
241     }
242     if (print_average) {
243         printf("  memcpy %dx%d bytes average %.2f MB/s std dev %.4f\n",
244                copies, size, running_avg_kb/1024.0,
245                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
246     }
247     return 0;
248 }
249 
benchmarkMemread(const command_data_t & cmd_data)250 int benchmarkMemread(const command_data_t &cmd_data) {
251     int size = cmd_data.args[0];
252     int iters = cmd_data.args[1];
253 
254     int *src = reinterpret_cast<int*>(malloc(size));
255     if (!src)
256         return -1;
257 
258     // Use volatile so the compiler does not optimize away the reads.
259     volatile int foo;
260     uint64_t time_ns;
261     int j, k;
262     double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
263     bool print_average = cmd_data.print_average;
264     bool print_each_iter = cmd_data.print_each_iter;
265     int c = cmd_data.data_size / size;
266     for (int i = 0; iters == -1 || i < iters; i++) {
267         time_ns = nanoTime();
268         for (j = 0; j < c; j++)
269             for (k = 0; k < size/4; k++)
270                 foo = src[k];
271         time_ns = nanoTime() - time_ns;
272 
273         // Compute in kb to avoid any overflows.
274         COMPUTE_AVERAGE_KB(avg_kb, c * size, time_ns);
275 
276         if (print_average) {
277             COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
278         }
279 
280         if (print_each_iter) {
281             printf("read %dx%d bytes took %.06f seconds (%f MB/s)\n",
282                    c, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
283         }
284     }
285 
286     if (print_average) {
287         printf("  read %dx%d bytes average %.2f MB/s std dev %.4f\n",
288                c, size, running_avg_kb/1024.0,
289                GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
290     }
291 
292     return 0;
293 }
294 
295 // Create the mapping structure.
296 function_t function_table[] = {
297     { "sleep", benchmarkSleep },
298     { "cpu", benchmarkCpu },
299     { "memset", benchmarkMemset },
300     { "memcpy", benchmarkMemcpy },
301     { "memread", benchmarkMemread },
302     { NULL, NULL }
303 };
304 
usage()305 void usage() {
306     printf("Usage:\n");
307     printf("  micro_bench [--data_size DATA_BYTES] [--print_average]\n");
308     printf("              [--no_print_each_iter] [--lock_to_cpu CORE]\n");
309     printf("    --data_size DATA_BYTES\n");
310     printf("      For the data benchmarks (memcpy/memset/memread) the approximate\n");
311     printf("      size of data, in bytes, that will be manipulated in each iteration.\n");
312     printf("    --print_average\n");
313     printf("      Print the average and standard deviation of all iterations.\n");
314     printf("    --no_print_each_iter\n");
315     printf("      Do not print any values in each iteration.\n");
316     printf("    --lock_to_cpu CORE\n");
317     printf("      Lock to the specified CORE. The default is to use the last core found.\n");
318     printf("    ITERS\n");
319     printf("      The number of iterations to execute each benchmark. If not\n");
320     printf("      passed in then run forever.\n");
321     printf("  micro_bench sleep TIME_TO_SLEEP [ITERS]\n");
322     printf("    TIME_TO_SLEEP\n");
323     printf("      The time in seconds to sleep.\n");
324     printf("  micro_bench cpu UNUSED [ITERS]\n");
325     printf("  micro_bench [--dst_align ALIGN] memset NUM_BYTES [ITERS]\n");
326     printf("    --dst_align ALIGN\n");
327     printf("      Align the memset destination pointer to ALIGN. The default is to use the\n");
328     printf("      value returned by malloc.\n");
329     printf("  micro_bench [--src_align ALIGN] [--dst_align ALIGN] memcpy NUM_BYTES [ITERS]\n");
330     printf("    --src_align ALIGN\n");
331     printf("      Align the memcpy source pointer to ALIGN. The default is to use the\n");
332     printf("      value returned by malloc.\n");
333     printf("    --dst_align ALIGN\n");
334     printf("      Align the memcpy destination pointer to ALIGN. The default is to use the\n");
335     printf("      value returned by malloc.\n");
336     printf("  micro_bench memread NUM_BYTES [ITERS]\n");
337 }
338 
processOptions(int argc,char ** argv,command_data_t * cmd_data)339 function_t *processOptions(int argc, char **argv, command_data_t *cmd_data) {
340     function_t *command = NULL;
341 
342     // Initialize the command_flags.
343     cmd_data->print_average = false;
344     cmd_data->print_each_iter = true;
345     cmd_data->dst_align = 0;
346     cmd_data->src_align = 0;
347     cmd_data->num_args = 0;
348     cmd_data->cpu_to_lock = -1;
349     cmd_data->data_size = DEFAULT_DATA_SIZE;
350     for (int i = 0; i < MAX_ARGS; i++) {
351         cmd_data->args[i] = -1;
352     }
353 
354     for (int i = 1; i < argc; i++) {
355         if (argv[i][0] == '-') {
356             int *save_value = NULL;
357             if (strcmp(argv[i], "--print_average") == 0) {
358               cmd_data->print_average = true;
359             } else if (strcmp(argv[i], "--no_print_each_iter") == 0) {
360               cmd_data->print_each_iter = false;
361             } else if (strcmp(argv[i], "--dst_align") == 0) {
362               save_value = &cmd_data->dst_align;
363             } else if (strcmp(argv[i], "--src_align") == 0) {
364               save_value = &cmd_data->src_align;
365             } else if (strcmp(argv[i], "--lock_to_cpu") == 0) {
366               save_value = &cmd_data->cpu_to_lock;
367             } else if (strcmp(argv[i], "--data_size") == 0) {
368               save_value = &cmd_data->data_size;
369             } else {
370                 printf("Unknown option %s\n", argv[i]);
371                 return NULL;
372             }
373             if (save_value) {
374                 // Checking both characters without a strlen() call should be
375                 // safe since as long as the argument exists, one character will
376                 // be present (\0). And if the first character is '-', then
377                 // there will always be a second character (\0 again).
378                 if (i == argc - 1 || (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) {
379                     printf("The option %s requires one argument.\n",
380                            argv[i]);
381                     return NULL;
382                 }
383                 *save_value = atoi(argv[++i]);
384             }
385         } else if (!command) {
386             for (function_t *function = function_table; function->name != NULL; function++) {
387                 if (strcmp(argv[i], function->name) == 0) {
388                     command = function;
389                     break;
390                 }
391             }
392             if (!command) {
393                 printf("Uknown command %s\n", argv[i]);
394                 return NULL;
395             }
396         } else if (cmd_data->num_args > MAX_ARGS) {
397             printf("More than %d number arguments passed in.\n", MAX_ARGS);
398             return NULL;
399         } else {
400             cmd_data->args[cmd_data->num_args++] = atoi(argv[i]);
401         }
402     }
403 
404     // Check the arguments passed in make sense.
405     if (cmd_data->num_args != 1 && cmd_data->num_args != 2) {
406         printf("Not enough arguments passed in.\n");
407         return NULL;
408     } else if (cmd_data->dst_align < 0) {
409         printf("The --dst_align option must be greater than or equal to 0.\n");
410         return NULL;
411     } else if (cmd_data->src_align < 0) {
412         printf("The --src_align option must be greater than or equal to 0.\n");
413         return NULL;
414     } else if (cmd_data->data_size <= 0) {
415         printf("The --data_size option must be a positive number.\n");
416         return NULL;
417     } else if ((cmd_data->dst_align & (cmd_data->dst_align - 1))) {
418         printf("The --dst_align option must be a power of 2.\n");
419         return NULL;
420     } else if ((cmd_data->src_align & (cmd_data->src_align - 1))) {
421         printf("The --src_align option must be a power of 2.\n");
422         return NULL;
423     }
424 
425     return command;
426 }
427 
raisePriorityAndLock(int cpu_to_lock)428 bool raisePriorityAndLock(int cpu_to_lock) {
429     cpu_set_t cpuset;
430 
431     if (setpriority(PRIO_PROCESS, 0, -20)) {
432         perror("Unable to raise priority of process.\n");
433         return false;
434     }
435 
436     CPU_ZERO(&cpuset);
437     if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
438         perror("sched_getaffinity failed");
439         return false;
440     }
441 
442     if (cpu_to_lock < 0) {
443         // Lock to the last active core we find.
444         for (int i = 0; i < CPU_SETSIZE; i++) {
445             if (CPU_ISSET(i, &cpuset)) {
446                 cpu_to_lock = i;
447             }
448         }
449     } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
450         printf("Cpu %d does not exist.\n", cpu_to_lock);
451         return false;
452     }
453 
454     if (cpu_to_lock < 0) {
455         printf("Cannot find any valid cpu to lock.\n");
456         return false;
457     }
458 
459     CPU_ZERO(&cpuset);
460     CPU_SET(cpu_to_lock, &cpuset);
461     if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
462         perror("sched_setaffinity failed");
463         return false;
464     }
465 
466     return true;
467 }
468 
main(int argc,char ** argv)469 int main(int argc, char **argv) {
470     command_data_t cmd_data;
471 
472     function_t *command = processOptions(argc, argv, &cmd_data);
473     if (!command) {
474       usage();
475       return -1;
476     }
477 
478     if (!raisePriorityAndLock(cmd_data.cpu_to_lock)) {
479       return -1;
480     }
481 
482     printf("%s\n", command->name);
483     return (*command->ptr)(cmd_data);
484 }
485