1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "bandwidth.h"
18
19 #include <ctype.h>
20 #include <pthread.h>
21 #include <sched.h>
22 #include <sys/resource.h>
23 #include <sys/time.h>
24 #include <unistd.h>
25
26 #include <map>
27 #include <vector>
28
29
30 typedef struct {
31 const char *name;
32 bool int_type;
33 } option_t;
34
35 option_t bandwidth_opts[] = {
36 { "size", true },
37 { "num_warm_loops", true },
38 { "num_loops", true },
39 { "type", false },
40 { NULL, false },
41 };
42
43 option_t per_core_opts[] = {
44 { "size", true },
45 { "num_warm_loops", true},
46 { "num_loops", true },
47 { "type", false },
48 { NULL, false },
49 };
50
51 option_t multithread_opts[] = {
52 { "size", true },
53 { "num_warm_loops", true},
54 { "num_loops", true },
55 { "type", false },
56 { "num_threads", true },
57 { NULL, false },
58 };
59
60 typedef union {
61 int int_value;
62 const char *char_value;
63 } arg_value_t;
64 typedef std::map<const char*, arg_value_t> arg_t;
65
processBandwidthOptions(int argc,char ** argv,option_t options[],arg_t * values)66 bool processBandwidthOptions(int argc, char** argv, option_t options[],
67 arg_t *values) {
68 for (int i = 1; i < argc; i++) {
69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70 char *arg = &argv[i][2];
71
72 for (int j = 0; options[j].name != NULL; j++) {
73 if (strcmp(arg, options[j].name) == 0) {
74 const char *name = options[j].name;
75 if (i == argc - 1) {
76 printf("The option --%s requires an argument.\n", name);
77 return false;
78 }
79 if (options[j].int_type) {
80 (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81 } else {
82 (*values)[name].char_value = argv[++i];
83 }
84 }
85 }
86 }
87 }
88
89 return true;
90 }
91
createBandwidthBenchmarkObject(arg_t values)92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93 BandwidthBenchmark *bench = NULL;
94
95 const char *name = values["type"].char_value;
96 size_t size = 0;
97 if (values.count("size") > 0) {
98 size = values["size"].int_value;
99 }
100 if (strcmp(name, "copy_ldrd_strd") == 0) {
101 bench = new CopyLdrdStrdBenchmark();
102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103 bench = new CopyLdmiaStmiaBenchmark();
104 } else if (strcmp(name, "copy_vld1_vst1") == 0) {
105 bench = new CopyVld1Vst1Benchmark();
106 } else if (strcmp(name, "copy_vldr_vstr") == 0) {
107 bench = new CopyVldrVstrBenchmark();
108 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
109 bench = new CopyVldmiaVstmiaBenchmark();
110 } else if (strcmp(name, "memcpy") == 0) {
111 bench = new MemcpyBenchmark();
112 } else if (strcmp(name, "write_strd") == 0) {
113 bench = new WriteStrdBenchmark();
114 } else if (strcmp(name, "write_stmia") == 0) {
115 bench = new WriteStmiaBenchmark();
116 } else if (strcmp(name, "write_vst1") == 0) {
117 bench = new WriteVst1Benchmark();
118 } else if (strcmp(name, "write_vstr") == 0) {
119 bench = new WriteVstrBenchmark();
120 } else if (strcmp(name, "write_vstmia") == 0) {
121 bench = new WriteVstmiaBenchmark();
122 } else if (strcmp(name, "memset") == 0) {
123 bench = new MemsetBenchmark();
124 } else if (strcmp(name, "read_ldrd") == 0) {
125 bench = new ReadLdrdBenchmark();
126 } else if (strcmp(name, "read_ldmia") == 0) {
127 bench = new ReadLdmiaBenchmark();
128 } else if (strcmp(name, "read_vld1") == 0) {
129 bench = new ReadVld1Benchmark();
130 } else if (strcmp(name, "read_vldr") == 0) {
131 bench = new ReadVldrBenchmark();
132 } else if (strcmp(name, "read_vldmia") == 0) {
133 bench = new ReadVldmiaBenchmark();
134 } else {
135 printf("Unknown type name %s\n", name);
136 return NULL;
137 }
138
139 if (!bench->setSize(values["size"].int_value)) {
140 printf("Failed to allocate buffers for benchmark.\n");
141 return NULL;
142 }
143
144 if (values.count("num_warm_loops") > 0) {
145 bench->set_num_loops(values["num_warm_loops"].int_value);
146 }
147 if (values.count("num_loops") > 0) {
148 bench->set_num_loops(values["num_loops"].int_value);
149 }
150
151 return bench;
152 }
153
getAvailCpus(std::vector<int> * cpu_list)154 bool getAvailCpus(std::vector<int> *cpu_list) {
155 cpu_set_t cpuset;
156
157 CPU_ZERO(&cpuset);
158 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
159 perror("sched_getaffinity failed.");
160 return false;
161 }
162
163 for (int i = 0; i < CPU_SETSIZE; i++) {
164 if (CPU_ISSET(i, &cpuset)) {
165 cpu_list->push_back(i);
166 }
167 }
168
169 return true;
170 }
171
172 typedef struct {
173 int core;
174 BandwidthBenchmark *bench;
175 double avg_mb;
176 volatile bool *run;
177 } thread_arg_t;
178
runBandwidthThread(void * data)179 void *runBandwidthThread(void *data) {
180 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
181
182 if (arg->core >= 0) {
183 cpu_set_t cpuset;
184 CPU_ZERO(&cpuset);
185 CPU_SET(arg->core, &cpuset);
186 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
187 perror("sched_setaffinity failed");
188 return NULL;
189 }
190 }
191
192 // Spinloop waiting for the run variable to get set to true.
193 while (!*arg->run) {
194 }
195
196 double avg_mb = 0;
197 for (int run = 1; ; run++) {
198 arg->bench->run();
199 if (!*arg->run) {
200 // Throw away the last data point since it's possible not
201 // all of the threads are running at this point.
202 break;
203 }
204 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
205 }
206 arg->avg_mb = avg_mb;
207
208 return NULL;
209 }
210
processThreadArgs(int argc,char ** argv,option_t options[],arg_t * values)211 bool processThreadArgs(int argc, char** argv, option_t options[],
212 arg_t *values) {
213 // Use some smaller values for the number of loops.
214 (*values)["num_warm_loops"].int_value = 1000000;
215 (*values)["num_loops"].int_value = 10000000;
216
217 if (!processBandwidthOptions(argc, argv, options, values)) {
218 return false;
219 }
220 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
221 printf("The size values must be a multiple of 64.\n");
222 return false;
223 }
224 if (values->count("type") == 0) {
225 printf("Must specify the type value.\n");
226 return false;
227 }
228
229 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
230 if (!bench) {
231 return false;
232 }
233
234 if (setpriority(PRIO_PROCESS, 0, -20)) {
235 perror("Unable to raise priority of process.");
236 return false;
237 }
238
239 printf("Calculating optimum run time...\n");
240 nsecs_t t = system_time();
241 bench->run();
242 t = system_time() - t;
243 // Since this is only going to be running single threaded, assume that
244 // if the number is set to ten times this value, we should get at least
245 // a couple of samples per thread.
246 int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
247
248 (*values)["run_time"].int_value = run_time;
249 (*values)["size"].int_value = bench->size();
250 (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
251 (*values)["num_loops"].int_value = bench->num_loops();
252 delete bench;
253
254 return true;
255 }
256
runThreadedTest(thread_arg_t args[],int num_threads,int run_time)257 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
258 pthread_t threads[num_threads];
259 volatile bool run = false;
260
261 int rc;
262 for (int i = 0; i < num_threads; i++) {
263 args[i].run = &run;
264 rc = pthread_create(&threads[i], NULL, runBandwidthThread,
265 (void*)&args[i]);
266 if (rc != 0) {
267 printf("Failed to launch thread %d\n", i);
268 return false;
269 }
270 }
271
272 // Kick start the threads.
273 run = true;
274
275 // Let the threads run.
276 sleep(run_time);
277
278 // Stop the threads.
279 run = false;
280
281 // Wait for the threads to complete.
282 for (int i = 0; i < num_threads; i++) {
283 rc = pthread_join(threads[i], NULL);
284 if (rc != 0) {
285 printf("Thread %d failed to join.\n", i);
286 return false;
287 }
288 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
289 args[i].bench->getName(), args[i].avg_mb);
290 }
291
292 return true;
293 }
294
per_core_bandwidth(int argc,char ** argv)295 int per_core_bandwidth(int argc, char** argv) {
296 arg_t values;
297 if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
298 return -1;
299 }
300
301 std::vector<int> cpu_list;
302 if (!getAvailCpus(&cpu_list)) {
303 printf("Failed to get available cpu list.\n");
304 return -1;
305 }
306
307 thread_arg_t args[cpu_list.size()];
308
309 int i = 0;
310 for (std::vector<int>::iterator it = cpu_list.begin();
311 it != cpu_list.end(); ++it, ++i) {
312 args[i].core = *it;
313 args[i].bench = createBandwidthBenchmarkObject(values);
314 if (!args[i].bench) {
315 return -1;
316 }
317 }
318
319 printf("Running on %d cores\n", cpu_list.size());
320 printf(" run_time = %ds\n", values["run_time"].int_value);
321 printf(" size = %d\n", values["size"].int_value);
322 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
323 printf(" num_loops = %d\n", values["num_loops"].int_value);
324 printf("\n");
325
326 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
327 return -1;
328 }
329
330 return 0;
331 }
332
multithread_bandwidth(int argc,char ** argv)333 int multithread_bandwidth(int argc, char** argv) {
334 arg_t values;
335 if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
336 return -1;
337 }
338 if (values.count("num_threads") == 0) {
339 printf("Must specify the num_threads value.\n");
340 return -1;
341 }
342 int num_threads = values["num_threads"].int_value;
343
344 thread_arg_t args[num_threads];
345
346 int i = 0;
347 for (int i = 0; i < num_threads; i++) {
348 args[i].core = -1;
349 args[i].bench = createBandwidthBenchmarkObject(values);
350 if (!args[i].bench) {
351 return -1;
352 }
353 }
354
355 printf("Running %d threads\n", num_threads);
356 printf(" run_time = %ds\n", values["run_time"].int_value);
357 printf(" size = %d\n", values["size"].int_value);
358 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
359 printf(" num_loops = %d\n", values["num_loops"].int_value);
360 printf("\n");
361
362 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
363 return -1;
364 }
365
366 return 0;
367 }
368
run_bandwidth_benchmark(int argc,char ** argv,const char * name,std::vector<BandwidthBenchmark * > bench_objs)369 bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
370 std::vector<BandwidthBenchmark*> bench_objs) {
371 arg_t values;
372 values["size"].int_value = 0;
373 values["num_warm_loops"].int_value = 0;
374 values["num_loops"].int_value = 0;
375 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
376 return false;
377 }
378
379 size_t size = values["size"].int_value;
380 if ((size % 64) != 0) {
381 printf("The size value must be a multiple of 64.\n");
382 return false;
383 }
384
385 if (setpriority(PRIO_PROCESS, 0, -20)) {
386 perror("Unable to raise priority of process.");
387 return false;
388 }
389
390 bool preamble_printed = false;
391 size_t num_warm_loops = values["num_warm_loops"].int_value;
392 size_t num_loops = values["num_loops"].int_value;
393 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
394 it != bench_objs.end(); ++it) {
395 if (!(*it)->canRun()) {
396 continue;
397 }
398 if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
399 printf("Failed creating buffer for bandwidth test.\n");
400 return false;
401 }
402 if (num_warm_loops) {
403 (*it)->set_num_warm_loops(num_warm_loops);
404 }
405 if (num_loops) {
406 (*it)->set_num_loops(num_loops);
407 }
408 if (!preamble_printed) {
409 preamble_printed = true;
410 printf("Benchmarking %s bandwidth\n", name);
411 printf(" size = %d\n", (*it)->size());
412 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops());
413 printf(" num_loops = %d\n\n", (*it)->num_loops());
414 }
415 (*it)->run();
416 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
417 (*it)->mb_per_sec());
418 }
419
420 return true;
421 }
422
copy_bandwidth(int argc,char ** argv)423 int copy_bandwidth(int argc, char** argv) {
424 std::vector<BandwidthBenchmark*> bench_objs;
425 bench_objs.push_back(new CopyLdrdStrdBenchmark());
426 bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
427 bench_objs.push_back(new CopyVld1Vst1Benchmark());
428 bench_objs.push_back(new CopyVldrVstrBenchmark());
429 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
430 bench_objs.push_back(new MemcpyBenchmark());
431
432 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
433 return -1;
434 }
435 return 0;
436 }
437
write_bandwidth(int argc,char ** argv)438 int write_bandwidth(int argc, char** argv) {
439 std::vector<BandwidthBenchmark*> bench_objs;
440 bench_objs.push_back(new WriteStrdBenchmark());
441 bench_objs.push_back(new WriteStmiaBenchmark());
442 bench_objs.push_back(new WriteVst1Benchmark());
443 bench_objs.push_back(new WriteVstrBenchmark());
444 bench_objs.push_back(new WriteVstmiaBenchmark());
445 bench_objs.push_back(new MemsetBenchmark());
446
447 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
448 return -1;
449 }
450
451 return 0;
452 }
453
read_bandwidth(int argc,char ** argv)454 int read_bandwidth(int argc, char** argv) {
455 std::vector<BandwidthBenchmark*> bench_objs;
456 bench_objs.push_back(new ReadLdrdBenchmark());
457 bench_objs.push_back(new ReadLdmiaBenchmark());
458 bench_objs.push_back(new ReadVld1Benchmark());
459 bench_objs.push_back(new ReadVldrBenchmark());
460 bench_objs.push_back(new ReadVldmiaBenchmark());
461
462 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
463 return -1;
464 }
465 return 0;
466 }
467