1 /*
2 ** Copyright 2010 The Android Open Source Project
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 /*
18 * Micro-benchmarking of sleep/cpu speed/memcpy/memset/memory reads.
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <ctype.h>
24 #include <math.h>
25 #include <sched.h>
26 #include <sys/resource.h>
27 #include <time.h>
28 #include <unistd.h>
29
30 // The default size of data that will be manipulated in each iteration of
31 // a memory benchmark. Can be modified with the --data_size option.
32 #define DEFAULT_DATA_SIZE 1000000000
33
34 // Number of nanoseconds in a second.
35 #define NS_PER_SEC 1000000000
36
37 // The maximum number of arguments that a benchmark will accept.
38 #define MAX_ARGS 2
39
40 // Use macros to compute values to try and avoid disturbing memory as much
41 // as possible after each iteration.
42 #define COMPUTE_AVERAGE_KB(avg_kb, bytes, time_ns) \
43 avg_kb = ((bytes) / 1024.0) / ((double)(time_ns) / NS_PER_SEC);
44
45 #define COMPUTE_RUNNING(avg, running_avg, square_avg, cur_idx) \
46 running_avg = ((running_avg) / ((cur_idx) + 1)) * (cur_idx) + (avg) / ((cur_idx) + 1); \
47 square_avg = ((square_avg) / ((cur_idx) + 1)) * (cur_idx) + ((avg) / ((cur_idx) + 1)) * (avg);
48
49 #define GET_STD_DEV(running_avg, square_avg) \
50 sqrt((square_avg) - (running_avg) * (running_avg))
51
52 // Contains information about benchmark options.
53 typedef struct {
54 bool print_average;
55 bool print_each_iter;
56
57 int dst_align;
58 int src_align;
59
60 int cpu_to_lock;
61
62 int data_size;
63
64 int args[MAX_ARGS];
65 int num_args;
66 } command_data_t;
67
68 // Struct that contains a mapping of benchmark name to benchmark function.
69 typedef struct {
70 const char *name;
71 int (*ptr)(const command_data_t &cmd_data);
72 } function_t;
73
74 // Get the current time in nanoseconds.
nanoTime()75 uint64_t nanoTime() {
76 struct timespec t;
77
78 t.tv_sec = t.tv_nsec = 0;
79 clock_gettime(CLOCK_MONOTONIC, &t);
80 return static_cast<uint64_t>(t.tv_sec) * NS_PER_SEC + t.tv_nsec;
81 }
82
83 // Allocate memory with a specific alignment and return that pointer.
84 // This function assumes an alignment value that is a power of 2.
85 // If the alignment is 0, then use the pointer returned by malloc.
allocateAlignedMemory(size_t size,int alignment)86 uint8_t *allocateAlignedMemory(size_t size, int alignment) {
87 uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 2 * alignment));
88 if (!ptr)
89 return NULL;
90 if (alignment > 0) {
91 // When setting the alignment, set it to exactly the alignment chosen.
92 // The pointer returned will be guaranteed not to be aligned to anything
93 // more than that.
94 ptr += alignment - (ptr & (alignment - 1));
95 ptr |= alignment;
96 }
97
98 return reinterpret_cast<uint8_t*>(ptr);
99 }
100
benchmarkSleep(const command_data_t & cmd_data)101 int benchmarkSleep(const command_data_t &cmd_data) {
102 uint64_t time_ns;
103
104 int delay = cmd_data.args[0];
105 int iters = cmd_data.args[1];
106 bool print_each_iter = cmd_data.print_each_iter;
107 bool print_average = cmd_data.print_average;
108 double avg, running_avg = 0.0, square_avg = 0.0;
109 for (int i = 0; iters == -1 || i < iters; i++) {
110 time_ns = nanoTime();
111 sleep(delay);
112 time_ns = nanoTime() - time_ns;
113
114 avg = (double)time_ns / NS_PER_SEC;
115
116 if (print_average) {
117 COMPUTE_RUNNING(avg, running_avg, square_avg, i);
118 }
119
120 if (print_each_iter) {
121 printf("sleep(%d) took %.06f seconds\n", delay, avg);
122 }
123 }
124
125 if (print_average) {
126 printf(" sleep(%d) average %.06f seconds std dev %f\n", delay,
127 running_avg, GET_STD_DEV(running_avg, square_avg));
128 }
129
130 return 0;
131 }
132
benchmarkCpu(const command_data_t & cmd_data)133 int benchmarkCpu(const command_data_t &cmd_data) {
134 // Use volatile so that the loop is not optimized away by the compiler.
135 volatile int cpu_foo;
136
137 uint64_t time_ns;
138 int iters = cmd_data.args[1];
139 bool print_each_iter = cmd_data.print_each_iter;
140 bool print_average = cmd_data.print_average;
141 double avg, running_avg = 0.0, square_avg = 0.0;
142 for (int i = 0; iters == -1 || i < iters; i++) {
143 time_ns = nanoTime();
144 for (cpu_foo = 0; cpu_foo < 100000000; cpu_foo++);
145 time_ns = nanoTime() - time_ns;
146
147 avg = (double)time_ns / NS_PER_SEC;
148
149 if (print_average) {
150 COMPUTE_RUNNING(avg, running_avg, square_avg, i);
151 }
152
153 if (print_each_iter) {
154 printf("cpu took %.06f seconds\n", avg);
155 }
156 }
157
158 if (print_average) {
159 printf(" cpu average %.06f seconds std dev %f\n",
160 running_avg, GET_STD_DEV(running_avg, square_avg));
161 }
162
163 return 0;
164 }
165
benchmarkMemset(const command_data_t & cmd_data)166 int benchmarkMemset(const command_data_t &cmd_data) {
167 int size = cmd_data.args[0];
168 int iters = cmd_data.args[1];
169
170 uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
171 if (!dst)
172 return -1;
173
174 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
175 uint64_t time_ns;
176 int j;
177 bool print_average = cmd_data.print_average;
178 bool print_each_iter = cmd_data.print_each_iter;
179 int copies = cmd_data.data_size/size;
180 for (int i = 0; iters == -1 || i < iters; i++) {
181 time_ns = nanoTime();
182 for (j = 0; j < copies; j++)
183 memset(dst, 0, size);
184 time_ns = nanoTime() - time_ns;
185
186 // Compute in kb to avoid any overflows.
187 COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
188
189 if (print_average) {
190 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
191 }
192
193 if (print_each_iter) {
194 printf("memset %dx%d bytes took %.06f seconds (%f MB/s)\n",
195 copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
196 }
197 }
198
199 if (print_average) {
200 printf(" memset %dx%d bytes average %.2f MB/s std dev %.4f\n",
201 copies, size, running_avg_kb / 1024.0,
202 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
203 }
204 return 0;
205 }
206
benchmarkMemcpy(const command_data_t & cmd_data)207 int benchmarkMemcpy(const command_data_t &cmd_data) {
208 int size = cmd_data.args[0];
209 int iters = cmd_data.args[1];
210
211 uint8_t *src = allocateAlignedMemory(size, cmd_data.src_align);
212 if (!src)
213 return -1;
214 uint8_t *dst = allocateAlignedMemory(size, cmd_data.dst_align);
215 if (!dst)
216 return -1;
217
218 uint64_t time_ns;
219 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
220 int j;
221 bool print_average = cmd_data.print_average;
222 bool print_each_iter = cmd_data.print_each_iter;
223 int copies = cmd_data.data_size / size;
224 for (int i = 0; iters == -1 || i < iters; i++) {
225 time_ns = nanoTime();
226 for (j = 0; j < copies; j++)
227 memcpy(dst, src, size);
228 time_ns = nanoTime() - time_ns;
229
230 // Compute in kb to avoid any overflows.
231 COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns);
232
233 if (print_average) {
234 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
235 }
236
237 if (print_each_iter) {
238 printf("memcpy %dx%d bytes took %.06f seconds (%f MB/s)\n",
239 copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
240 }
241 }
242 if (print_average) {
243 printf(" memcpy %dx%d bytes average %.2f MB/s std dev %.4f\n",
244 copies, size, running_avg_kb/1024.0,
245 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
246 }
247 return 0;
248 }
249
benchmarkMemread(const command_data_t & cmd_data)250 int benchmarkMemread(const command_data_t &cmd_data) {
251 int size = cmd_data.args[0];
252 int iters = cmd_data.args[1];
253
254 int *src = reinterpret_cast<int*>(malloc(size));
255 if (!src)
256 return -1;
257
258 // Use volatile so the compiler does not optimize away the reads.
259 volatile int foo;
260 uint64_t time_ns;
261 int j, k;
262 double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0;
263 bool print_average = cmd_data.print_average;
264 bool print_each_iter = cmd_data.print_each_iter;
265 int c = cmd_data.data_size / size;
266 for (int i = 0; iters == -1 || i < iters; i++) {
267 time_ns = nanoTime();
268 for (j = 0; j < c; j++)
269 for (k = 0; k < size/4; k++)
270 foo = src[k];
271 time_ns = nanoTime() - time_ns;
272
273 // Compute in kb to avoid any overflows.
274 COMPUTE_AVERAGE_KB(avg_kb, c * size, time_ns);
275
276 if (print_average) {
277 COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i);
278 }
279
280 if (print_each_iter) {
281 printf("read %dx%d bytes took %.06f seconds (%f MB/s)\n",
282 c, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0);
283 }
284 }
285
286 if (print_average) {
287 printf(" read %dx%d bytes average %.2f MB/s std dev %.4f\n",
288 c, size, running_avg_kb/1024.0,
289 GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0);
290 }
291
292 return 0;
293 }
294
295 // Create the mapping structure.
296 function_t function_table[] = {
297 { "sleep", benchmarkSleep },
298 { "cpu", benchmarkCpu },
299 { "memset", benchmarkMemset },
300 { "memcpy", benchmarkMemcpy },
301 { "memread", benchmarkMemread },
302 { NULL, NULL }
303 };
304
usage()305 void usage() {
306 printf("Usage:\n");
307 printf(" micro_bench [--data_size DATA_BYTES] [--print_average]\n");
308 printf(" [--no_print_each_iter] [--lock_to_cpu CORE]\n");
309 printf(" --data_size DATA_BYTES\n");
310 printf(" For the data benchmarks (memcpy/memset/memread) the approximate\n");
311 printf(" size of data, in bytes, that will be manipulated in each iteration.\n");
312 printf(" --print_average\n");
313 printf(" Print the average and standard deviation of all iterations.\n");
314 printf(" --no_print_each_iter\n");
315 printf(" Do not print any values in each iteration.\n");
316 printf(" --lock_to_cpu CORE\n");
317 printf(" Lock to the specified CORE. The default is to use the last core found.\n");
318 printf(" ITERS\n");
319 printf(" The number of iterations to execute each benchmark. If not\n");
320 printf(" passed in then run forever.\n");
321 printf(" micro_bench sleep TIME_TO_SLEEP [ITERS]\n");
322 printf(" TIME_TO_SLEEP\n");
323 printf(" The time in seconds to sleep.\n");
324 printf(" micro_bench cpu UNUSED [ITERS]\n");
325 printf(" micro_bench [--dst_align ALIGN] memset NUM_BYTES [ITERS]\n");
326 printf(" --dst_align ALIGN\n");
327 printf(" Align the memset destination pointer to ALIGN. The default is to use the\n");
328 printf(" value returned by malloc.\n");
329 printf(" micro_bench [--src_align ALIGN] [--dst_align ALIGN] memcpy NUM_BYTES [ITERS]\n");
330 printf(" --src_align ALIGN\n");
331 printf(" Align the memcpy source pointer to ALIGN. The default is to use the\n");
332 printf(" value returned by malloc.\n");
333 printf(" --dst_align ALIGN\n");
334 printf(" Align the memcpy destination pointer to ALIGN. The default is to use the\n");
335 printf(" value returned by malloc.\n");
336 printf(" micro_bench memread NUM_BYTES [ITERS]\n");
337 }
338
processOptions(int argc,char ** argv,command_data_t * cmd_data)339 function_t *processOptions(int argc, char **argv, command_data_t *cmd_data) {
340 function_t *command = NULL;
341
342 // Initialize the command_flags.
343 cmd_data->print_average = false;
344 cmd_data->print_each_iter = true;
345 cmd_data->dst_align = 0;
346 cmd_data->src_align = 0;
347 cmd_data->num_args = 0;
348 cmd_data->cpu_to_lock = -1;
349 cmd_data->data_size = DEFAULT_DATA_SIZE;
350 for (int i = 0; i < MAX_ARGS; i++) {
351 cmd_data->args[i] = -1;
352 }
353
354 for (int i = 1; i < argc; i++) {
355 if (argv[i][0] == '-') {
356 int *save_value = NULL;
357 if (strcmp(argv[i], "--print_average") == 0) {
358 cmd_data->print_average = true;
359 } else if (strcmp(argv[i], "--no_print_each_iter") == 0) {
360 cmd_data->print_each_iter = false;
361 } else if (strcmp(argv[i], "--dst_align") == 0) {
362 save_value = &cmd_data->dst_align;
363 } else if (strcmp(argv[i], "--src_align") == 0) {
364 save_value = &cmd_data->src_align;
365 } else if (strcmp(argv[i], "--lock_to_cpu") == 0) {
366 save_value = &cmd_data->cpu_to_lock;
367 } else if (strcmp(argv[i], "--data_size") == 0) {
368 save_value = &cmd_data->data_size;
369 } else {
370 printf("Unknown option %s\n", argv[i]);
371 return NULL;
372 }
373 if (save_value) {
374 // Checking both characters without a strlen() call should be
375 // safe since as long as the argument exists, one character will
376 // be present (\0). And if the first character is '-', then
377 // there will always be a second character (\0 again).
378 if (i == argc - 1 || (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) {
379 printf("The option %s requires one argument.\n",
380 argv[i]);
381 return NULL;
382 }
383 *save_value = atoi(argv[++i]);
384 }
385 } else if (!command) {
386 for (function_t *function = function_table; function->name != NULL; function++) {
387 if (strcmp(argv[i], function->name) == 0) {
388 command = function;
389 break;
390 }
391 }
392 if (!command) {
393 printf("Uknown command %s\n", argv[i]);
394 return NULL;
395 }
396 } else if (cmd_data->num_args > MAX_ARGS) {
397 printf("More than %d number arguments passed in.\n", MAX_ARGS);
398 return NULL;
399 } else {
400 cmd_data->args[cmd_data->num_args++] = atoi(argv[i]);
401 }
402 }
403
404 // Check the arguments passed in make sense.
405 if (cmd_data->num_args != 1 && cmd_data->num_args != 2) {
406 printf("Not enough arguments passed in.\n");
407 return NULL;
408 } else if (cmd_data->dst_align < 0) {
409 printf("The --dst_align option must be greater than or equal to 0.\n");
410 return NULL;
411 } else if (cmd_data->src_align < 0) {
412 printf("The --src_align option must be greater than or equal to 0.\n");
413 return NULL;
414 } else if (cmd_data->data_size <= 0) {
415 printf("The --data_size option must be a positive number.\n");
416 return NULL;
417 } else if ((cmd_data->dst_align & (cmd_data->dst_align - 1))) {
418 printf("The --dst_align option must be a power of 2.\n");
419 return NULL;
420 } else if ((cmd_data->src_align & (cmd_data->src_align - 1))) {
421 printf("The --src_align option must be a power of 2.\n");
422 return NULL;
423 }
424
425 return command;
426 }
427
raisePriorityAndLock(int cpu_to_lock)428 bool raisePriorityAndLock(int cpu_to_lock) {
429 cpu_set_t cpuset;
430
431 if (setpriority(PRIO_PROCESS, 0, -20)) {
432 perror("Unable to raise priority of process.\n");
433 return false;
434 }
435
436 CPU_ZERO(&cpuset);
437 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
438 perror("sched_getaffinity failed");
439 return false;
440 }
441
442 if (cpu_to_lock < 0) {
443 // Lock to the last active core we find.
444 for (int i = 0; i < CPU_SETSIZE; i++) {
445 if (CPU_ISSET(i, &cpuset)) {
446 cpu_to_lock = i;
447 }
448 }
449 } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
450 printf("Cpu %d does not exist.\n", cpu_to_lock);
451 return false;
452 }
453
454 if (cpu_to_lock < 0) {
455 printf("Cannot find any valid cpu to lock.\n");
456 return false;
457 }
458
459 CPU_ZERO(&cpuset);
460 CPU_SET(cpu_to_lock, &cpuset);
461 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
462 perror("sched_setaffinity failed");
463 return false;
464 }
465
466 return true;
467 }
468
main(int argc,char ** argv)469 int main(int argc, char **argv) {
470 command_data_t cmd_data;
471
472 function_t *command = processOptions(argc, argv, &cmd_data);
473 if (!command) {
474 usage();
475 return -1;
476 }
477
478 if (!raisePriorityAndLock(cmd_data.cpu_to_lock)) {
479 return -1;
480 }
481
482 printf("%s\n", command->name);
483 return (*command->ptr)(cmd_data);
484 }
485