• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bandwidth.h"
18 
19 #include <ctype.h>
20 #include <pthread.h>
21 #include <sched.h>
22 #include <sys/resource.h>
23 #include <sys/time.h>
24 #include <unistd.h>
25 
26 #include <map>
27 #include <vector>
28 
29 
30 typedef struct {
31     const char *name;
32     bool int_type;
33 } option_t;
34 
35 option_t bandwidth_opts[] = {
36     { "size", true },
37     { "num_warm_loops", true },
38     { "num_loops", true },
39     { "type", false },
40     { NULL, false },
41 };
42 
43 option_t per_core_opts[] = {
44     { "size", true },
45     { "num_warm_loops", true},
46     { "num_loops", true },
47     { "type", false },
48     { NULL, false },
49 };
50 
51 option_t multithread_opts[] = {
52     { "size", true },
53     { "num_warm_loops", true},
54     { "num_loops", true },
55     { "type", false },
56     { "num_threads", true },
57     { NULL, false },
58 };
59 
60 typedef union {
61     int int_value;
62     const char *char_value;
63 } arg_value_t;
64 typedef std::map<const char*, arg_value_t> arg_t;
65 
processBandwidthOptions(int argc,char ** argv,option_t options[],arg_t * values)66 bool processBandwidthOptions(int argc, char** argv, option_t options[],
67                              arg_t *values) {
68     for (int i = 1; i < argc; i++) {
69         if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70             char *arg = &argv[i][2];
71 
72             for (int j = 0; options[j].name != NULL; j++) {
73                 if (strcmp(arg, options[j].name) == 0) {
74                     const char *name = options[j].name;
75                     if (i == argc - 1) {
76                         printf("The option --%s requires an argument.\n", name);
77                         return false;
78                     }
79                     if (options[j].int_type) {
80                         (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81                     } else {
82                         (*values)[name].char_value = argv[++i];
83                     }
84                 }
85             }
86         }
87     }
88 
89     return true;
90 }
91 
createBandwidthBenchmarkObject(arg_t values)92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93     BandwidthBenchmark *bench = NULL;
94 
95     const char *name = values["type"].char_value;
96     size_t size = 0;
97     if (values.count("size") > 0) {
98         size = values["size"].int_value;
99     }
100     if (strcmp(name, "copy_ldrd_strd") == 0) {
101         bench = new CopyLdrdStrdBenchmark();
102     } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103         bench = new CopyLdmiaStmiaBenchmark();
104     } else if (strcmp(name, "copy_vld1_vst1") == 0) {
105         bench = new CopyVld1Vst1Benchmark();
106     } else if (strcmp(name, "copy_vldr_vstr") == 0) {
107         bench = new CopyVldrVstrBenchmark();
108     } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
109         bench = new CopyVldmiaVstmiaBenchmark();
110     } else if (strcmp(name, "memcpy") == 0) {
111         bench = new MemcpyBenchmark();
112     } else if (strcmp(name, "write_strd") == 0) {
113         bench = new WriteStrdBenchmark();
114     } else if (strcmp(name, "write_stmia") == 0) {
115         bench = new WriteStmiaBenchmark();
116     } else if (strcmp(name, "write_vst1") == 0) {
117         bench = new WriteVst1Benchmark();
118     } else if (strcmp(name, "write_vstr") == 0) {
119         bench = new WriteVstrBenchmark();
120     } else if (strcmp(name, "write_vstmia") == 0) {
121         bench = new WriteVstmiaBenchmark();
122     } else if (strcmp(name, "memset") == 0) {
123         bench = new MemsetBenchmark();
124     } else if (strcmp(name, "read_ldrd") == 0) {
125         bench = new ReadLdrdBenchmark();
126     } else if (strcmp(name, "read_ldmia") == 0) {
127         bench = new ReadLdmiaBenchmark();
128     } else if (strcmp(name, "read_vld1") == 0) {
129         bench = new ReadVld1Benchmark();
130     } else if (strcmp(name, "read_vldr") == 0) {
131         bench = new ReadVldrBenchmark();
132     } else if (strcmp(name, "read_vldmia") == 0) {
133         bench = new ReadVldmiaBenchmark();
134     } else {
135         printf("Unknown type name %s\n", name);
136         return NULL;
137     }
138 
139     if (!bench->setSize(size)) {
140         printf("Failed to allocate buffers for benchmark.\n");
141         delete bench;
142         return NULL;
143     }
144 
145     if (values.count("num_warm_loops") > 0) {
146         bench->set_num_loops(values["num_warm_loops"].int_value);
147     }
148     if (values.count("num_loops") > 0) {
149         bench->set_num_loops(values["num_loops"].int_value);
150     }
151 
152     return bench;
153 }
154 
getAvailCpus(std::vector<int> * cpu_list)155 bool getAvailCpus(std::vector<int> *cpu_list) {
156     cpu_set_t cpuset;
157 
158     CPU_ZERO(&cpuset);
159     if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
160         perror("sched_getaffinity failed.");
161         return false;
162     }
163 
164     for (int i = 0; i < CPU_SETSIZE; i++) {
165         if (CPU_ISSET(i, &cpuset)) {
166             cpu_list->push_back(i);
167         }
168     }
169 
170     return true;
171 }
172 
173 typedef struct {
174     int core;
175     BandwidthBenchmark *bench;
176     double  avg_mb;
177     volatile bool *run;
178 } thread_arg_t;
179 
runBandwidthThread(void * data)180 void *runBandwidthThread(void *data) {
181     thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
182 
183     if (arg->core >= 0) {
184         cpu_set_t cpuset;
185         CPU_ZERO(&cpuset);
186         CPU_SET(arg->core, &cpuset);
187         if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
188             perror("sched_setaffinity failed");
189             return NULL;
190         }
191     }
192 
193     // Spinloop waiting for the run variable to get set to true.
194     while (!*arg->run) {
195     }
196 
197     double avg_mb = 0;
198     for (int run = 1; ; run++) {
199         arg->bench->run();
200         if (!*arg->run) {
201             // Throw away the last data point since it's possible not
202             // all of the threads are running at this point.
203             break;
204         }
205         avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
206     }
207     arg->avg_mb = avg_mb;
208 
209     return NULL;
210 }
211 
processThreadArgs(int argc,char ** argv,option_t options[],arg_t * values)212 bool processThreadArgs(int argc, char** argv, option_t options[],
213                        arg_t *values) {
214     // Use some smaller values for the number of loops.
215     (*values)["num_warm_loops"].int_value = 1000000;
216     (*values)["num_loops"].int_value = 10000000;
217 
218     if (!processBandwidthOptions(argc, argv, options, values)) {
219         return false;
220     }
221     if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
222         printf("The size values must be a multiple of 64.\n");
223         return false;
224     }
225     if (values->count("type") == 0) {
226         printf("Must specify the type value.\n");
227         return false;
228     }
229 
230     BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
231     if (!bench) {
232         return false;
233     }
234 
235     if (setpriority(PRIO_PROCESS, 0, -20)) {
236         perror("Unable to raise priority of process.");
237         return false;
238     }
239 
240     printf("Calculating optimum run time...\n");
241     nsecs_t t = system_time();
242     bench->run();
243     t = system_time() - t;
244     // Since this is only going to be running single threaded, assume that
245     // if the number is set to ten times this value, we should get at least
246     // a couple of samples per thread.
247     int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
248 
249     (*values)["run_time"].int_value = run_time;
250     (*values)["size"].int_value = bench->size();
251     (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
252     (*values)["num_loops"].int_value = bench->num_loops();
253     delete bench;
254 
255     return true;
256 }
257 
runThreadedTest(thread_arg_t args[],int num_threads,int run_time)258 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
259     pthread_t threads[num_threads];
260     volatile bool run = false;
261 
262     int rc;
263     for (int i = 0; i < num_threads; i++) {
264         args[i].run = &run;
265         rc = pthread_create(&threads[i], NULL, runBandwidthThread,
266                             (void*)&args[i]);
267         if (rc != 0) {
268             printf("Failed to launch thread %d\n", i);
269             return false;
270         }
271     }
272 
273     // Kick start the threads.
274     run = true;
275 
276     // Let the threads run.
277     sleep(run_time);
278 
279     // Stop the threads.
280     run = false;
281 
282     // Wait for the threads to complete.
283     for (int i = 0; i < num_threads; i++) {
284         rc = pthread_join(threads[i], NULL);
285         if (rc != 0) {
286             printf("Thread %d failed to join.\n", i);
287             return false;
288         }
289         printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
290                args[i].bench->getName(), args[i].avg_mb);
291     }
292 
293     return true;
294 }
295 
per_core_bandwidth(int argc,char ** argv)296 int per_core_bandwidth(int argc, char** argv) {
297     arg_t values;
298     if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
299         return -1;
300     }
301 
302     std::vector<int> cpu_list;
303     if (!getAvailCpus(&cpu_list)) {
304         printf("Failed to get available cpu list.\n");
305         return -1;
306     }
307 
308     thread_arg_t args[cpu_list.size()];
309 
310     int i = 0;
311     for (std::vector<int>::iterator it = cpu_list.begin();
312          it != cpu_list.end(); ++it, ++i) {
313         args[i].core = *it;
314         args[i].bench = createBandwidthBenchmarkObject(values);
315         if (!args[i].bench) {
316             for (int j = 0; j < i; j++)
317                 delete args[j].bench;
318             return -1;
319         }
320     }
321 
322     printf("Running on %d cores\n", cpu_list.size());
323     printf("  run_time = %ds\n", values["run_time"].int_value);
324     printf("  size = %d\n", values["size"].int_value);
325     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
326     printf("  num_loops = %d\n", values["num_loops"].int_value);
327     printf("\n");
328 
329     if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
330         return -1;
331     }
332 
333     return 0;
334 }
335 
multithread_bandwidth(int argc,char ** argv)336 int multithread_bandwidth(int argc, char** argv) {
337     arg_t values;
338     if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
339         return -1;
340     }
341     if (values.count("num_threads") == 0) {
342         printf("Must specify the num_threads value.\n");
343         return -1;
344     }
345     int num_threads = values["num_threads"].int_value;
346 
347     thread_arg_t args[num_threads];
348 
349     for (int i = 0; i < num_threads; i++) {
350         args[i].core = -1;
351         args[i].bench = createBandwidthBenchmarkObject(values);
352         if (!args[i].bench) {
353             for (int j = 0; j < i; j++)
354                 delete args[j].bench;
355             return -1;
356         }
357     }
358 
359     printf("Running %d threads\n", num_threads);
360     printf("  run_time = %ds\n", values["run_time"].int_value);
361     printf("  size = %d\n", values["size"].int_value);
362     printf("  num_warm_loops = %d\n", values["num_warm_loops"].int_value);
363     printf("  num_loops = %d\n", values["num_loops"].int_value);
364     printf("\n");
365 
366     if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
367         return -1;
368     }
369 
370     return 0;
371 }
372 
run_bandwidth_benchmark(int argc,char ** argv,const char * name,std::vector<BandwidthBenchmark * > bench_objs)373 bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
374                              std::vector<BandwidthBenchmark*> bench_objs) {
375     arg_t values;
376     values["size"].int_value = 0;
377     values["num_warm_loops"].int_value = 0;
378     values["num_loops"].int_value = 0;
379     if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
380         return false;
381     }
382 
383     size_t size = values["size"].int_value;
384     if ((size % 64) != 0) {
385         printf("The size value must be a multiple of 64.\n");
386         return false;
387     }
388 
389     if (setpriority(PRIO_PROCESS, 0, -20)) {
390         perror("Unable to raise priority of process.");
391         return false;
392     }
393 
394     bool preamble_printed = false;
395     size_t num_warm_loops = values["num_warm_loops"].int_value;
396     size_t num_loops = values["num_loops"].int_value;
397     for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
398          it != bench_objs.end(); ++it) {
399         if (!(*it)->canRun()) {
400             continue;
401         }
402         if (!(*it)->setSize(values["size"].int_value)) {
403             printf("Failed creating buffer for bandwidth test.\n");
404             return false;
405         }
406         if (num_warm_loops) {
407             (*it)->set_num_warm_loops(num_warm_loops);
408         }
409         if (num_loops) {
410             (*it)->set_num_loops(num_loops);
411         }
412         if (!preamble_printed) {
413             preamble_printed = true;
414             printf("Benchmarking %s bandwidth\n", name);
415             printf("  size = %d\n", (*it)->size());
416             printf("  num_warm_loops = %d\n", (*it)->num_warm_loops());
417             printf("  num_loops = %d\n\n", (*it)->num_loops());
418         }
419         (*it)->run();
420         printf("  %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
421                (*it)->mb_per_sec());
422     }
423 
424     return true;
425 }
426 
copy_bandwidth(int argc,char ** argv)427 int copy_bandwidth(int argc, char** argv) {
428     std::vector<BandwidthBenchmark*> bench_objs;
429     bench_objs.push_back(new CopyLdrdStrdBenchmark());
430     bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
431     bench_objs.push_back(new CopyVld1Vst1Benchmark());
432     bench_objs.push_back(new CopyVldrVstrBenchmark());
433     bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
434     bench_objs.push_back(new MemcpyBenchmark());
435 
436     if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
437         return -1;
438     }
439     return 0;
440 }
441 
write_bandwidth(int argc,char ** argv)442 int write_bandwidth(int argc, char** argv) {
443     std::vector<BandwidthBenchmark*> bench_objs;
444     bench_objs.push_back(new WriteStrdBenchmark());
445     bench_objs.push_back(new WriteStmiaBenchmark());
446     bench_objs.push_back(new WriteVst1Benchmark());
447     bench_objs.push_back(new WriteVstrBenchmark());
448     bench_objs.push_back(new WriteVstmiaBenchmark());
449     bench_objs.push_back(new MemsetBenchmark());
450 
451     if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
452         return -1;
453     }
454 
455     return 0;
456 }
457 
read_bandwidth(int argc,char ** argv)458 int read_bandwidth(int argc, char** argv) {
459     std::vector<BandwidthBenchmark*> bench_objs;
460     bench_objs.push_back(new ReadLdrdBenchmark());
461     bench_objs.push_back(new ReadLdmiaBenchmark());
462     bench_objs.push_back(new ReadVld1Benchmark());
463     bench_objs.push_back(new ReadVldrBenchmark());
464     bench_objs.push_back(new ReadVldmiaBenchmark());
465 
466     if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
467         return -1;
468     }
469     return 0;
470 }
471