1 #include <binder/Binder.h>
2 #include <binder/IBinder.h>
3 #include <binder/IPCThreadState.h>
4 #include <binder/IServiceManager.h>
5 #include <string>
6 #include <cstring>
7 #include <cstdlib>
8 #include <cstdio>
9
10 #include <iostream>
11 #include <vector>
12 #include <tuple>
13
14 #include <unistd.h>
15 #include <sys/wait.h>
16
17 using namespace std;
18 using namespace android;
19
20 enum BinderWorkerServiceCode {
21 BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION,
22 };
23
24 #define ASSERT_TRUE(cond) \
25 do { \
26 if (!(cond)) {\
27 cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \
28 exit(EXIT_FAILURE); \
29 } \
30 } while (0)
31
32 class BinderWorkerService : public BBinder
33 {
34 public:
BinderWorkerService()35 BinderWorkerService() {}
~BinderWorkerService()36 ~BinderWorkerService() {}
onTransact(uint32_t code,const Parcel & data,Parcel * reply,uint32_t flags=0)37 virtual status_t onTransact(uint32_t code,
38 const Parcel& data, Parcel* reply,
39 uint32_t flags = 0) {
40 (void)flags;
41 (void)data;
42 (void)reply;
43 switch (code) {
44 case BINDER_NOP:
45 return NO_ERROR;
46 default:
47 return UNKNOWN_TRANSACTION;
48 };
49 }
50 };
51
52 class Pipe {
53 int m_readFd;
54 int m_writeFd;
Pipe(int readFd,int writeFd)55 Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {}
56 Pipe(const Pipe &) = delete;
57 Pipe& operator=(const Pipe &) = delete;
58 Pipe& operator=(const Pipe &&) = delete;
59 public:
Pipe(Pipe && rval)60 Pipe(Pipe&& rval) noexcept {
61 m_readFd = rval.m_readFd;
62 m_writeFd = rval.m_writeFd;
63 rval.m_readFd = 0;
64 rval.m_writeFd = 0;
65 }
~Pipe()66 ~Pipe() {
67 if (m_readFd)
68 close(m_readFd);
69 if (m_writeFd)
70 close(m_writeFd);
71 }
signal()72 void signal() {
73 bool val = true;
74 int error = write(m_writeFd, &val, sizeof(val));
75 ASSERT_TRUE(error >= 0);
76 };
wait()77 void wait() {
78 bool val = false;
79 int error = read(m_readFd, &val, sizeof(val));
80 ASSERT_TRUE(error >= 0);
81 }
send(const T & v)82 template <typename T> void send(const T& v) {
83 int error = write(m_writeFd, &v, sizeof(T));
84 ASSERT_TRUE(error >= 0);
85 }
recv(T & v)86 template <typename T> void recv(T& v) {
87 int error = read(m_readFd, &v, sizeof(T));
88 ASSERT_TRUE(error >= 0);
89 }
createPipePair()90 static tuple<Pipe, Pipe> createPipePair() {
91 int a[2];
92 int b[2];
93
94 int error1 = pipe(a);
95 int error2 = pipe(b);
96 ASSERT_TRUE(error1 >= 0);
97 ASSERT_TRUE(error2 >= 0);
98
99 return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1]));
100 }
101 };
102
103 static const uint32_t num_buckets = 128;
104 static uint64_t max_time_bucket = 50ull * 1000000;
105 static uint64_t time_per_bucket = max_time_bucket / num_buckets;
106
107 struct ProcResults {
108 uint64_t m_worst = 0;
109 uint32_t m_buckets[num_buckets] = {0};
110 uint64_t m_transactions = 0;
111 uint64_t m_long_transactions = 0;
112 uint64_t m_total_time = 0;
113 uint64_t m_best = max_time_bucket;
114
add_timeProcResults115 void add_time(uint64_t time) {
116 if (time > max_time_bucket) {
117 m_long_transactions++;
118 }
119 m_buckets[min((uint32_t)(time / time_per_bucket), num_buckets - 1)] += 1;
120 m_best = min(time, m_best);
121 m_worst = max(time, m_worst);
122 m_transactions += 1;
123 m_total_time += time;
124 }
combineProcResults125 static ProcResults combine(const ProcResults& a, const ProcResults& b) {
126 ProcResults ret;
127 for (int i = 0; i < num_buckets; i++) {
128 ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i];
129 }
130 ret.m_worst = max(a.m_worst, b.m_worst);
131 ret.m_best = min(a.m_best, b.m_best);
132 ret.m_transactions = a.m_transactions + b.m_transactions;
133 ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
134 ret.m_total_time = a.m_total_time + b.m_total_time;
135 return ret;
136 }
dumpProcResults137 void dump() {
138 if (m_long_transactions > 0) {
139 cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
140 "than estimated max latency. Consider setting -m to be higher than "
141 << m_worst / 1000 << " microseconds" << endl;
142 }
143
144 double best = (double)m_best / 1.0E6;
145 double worst = (double)m_worst / 1.0E6;
146 double average = (double)m_total_time / m_transactions / 1.0E6;
147 cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
148
149 uint64_t cur_total = 0;
150 float time_per_bucket_ms = time_per_bucket / 1.0E6;
151 for (int i = 0; i < num_buckets; i++) {
152 float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
153 if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
154 cout << "50%: " << cur_time << " ";
155 }
156 if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) {
157 cout << "90%: " << cur_time << " ";
158 }
159 if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) {
160 cout << "95%: " << cur_time << " ";
161 }
162 if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) {
163 cout << "99%: " << cur_time << " ";
164 }
165 cur_total += m_buckets[i];
166 }
167 cout << endl;
168 }
169 };
170
generateServiceName(int num)171 String16 generateServiceName(int num)
172 {
173 char num_str[32];
174 snprintf(num_str, sizeof(num_str), "%d", num);
175 String16 serviceName = String16("binderWorker") + String16(num_str);
176 return serviceName;
177 }
178
worker_fx(int num,int worker_count,int iterations,int payload_size,bool cs_pair,Pipe p)179 void worker_fx(int num,
180 int worker_count,
181 int iterations,
182 int payload_size,
183 bool cs_pair,
184 Pipe p)
185 {
186 // Create BinderWorkerService and for go.
187 ProcessState::self()->startThreadPool();
188 sp<IServiceManager> serviceMgr = defaultServiceManager();
189 sp<BinderWorkerService> service = new BinderWorkerService;
190 serviceMgr->addService(generateServiceName(num), service);
191
192 srand(num);
193 p.signal();
194 p.wait();
195
196 // If client/server pairs, then half the workers are
197 // servers and half are clients
198 int server_count = cs_pair ? worker_count / 2 : worker_count;
199
200 // Get references to other binder services.
201 cout << "Created BinderWorker" << num << endl;
202 (void)worker_count;
203 vector<sp<IBinder> > workers;
204 for (int i = 0; i < server_count; i++) {
205 if (num == i)
206 continue;
207 workers.push_back(serviceMgr->getService(generateServiceName(i)));
208 }
209
210 // Run the benchmark if client
211 ProcResults results;
212 chrono::time_point<chrono::high_resolution_clock> start, end;
213 for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
214 Parcel data, reply;
215 int target = cs_pair ? num % server_count : rand() % workers.size();
216 int sz = payload_size;
217
218 while (sz >= sizeof(uint32_t)) {
219 data.writeInt32(0);
220 sz -= sizeof(uint32_t);
221 }
222 start = chrono::high_resolution_clock::now();
223 status_t ret = workers[target]->transact(BINDER_NOP, data, &reply);
224 end = chrono::high_resolution_clock::now();
225
226 uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count());
227 results.add_time(cur_time);
228
229 if (ret != NO_ERROR) {
230 cout << "thread " << num << " failed " << ret << "i : " << i << endl;
231 exit(EXIT_FAILURE);
232 }
233 }
234
235 // Signal completion to master and wait.
236 p.signal();
237 p.wait();
238
239 // Send results to master and wait for go to exit.
240 p.send(results);
241 p.wait();
242
243 exit(EXIT_SUCCESS);
244 }
245
make_worker(int num,int iterations,int worker_count,int payload_size,bool cs_pair)246 Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair)
247 {
248 auto pipe_pair = Pipe::createPipePair();
249 pid_t pid = fork();
250 if (pid) {
251 /* parent */
252 return std::move(get<0>(pipe_pair));
253 } else {
254 /* child */
255 worker_fx(num, worker_count, iterations, payload_size, cs_pair,
256 std::move(get<1>(pipe_pair)));
257 /* never get here */
258 return std::move(get<0>(pipe_pair));
259 }
260
261 }
262
wait_all(vector<Pipe> & v)263 void wait_all(vector<Pipe>& v)
264 {
265 for (int i = 0; i < v.size(); i++) {
266 v[i].wait();
267 }
268 }
269
signal_all(vector<Pipe> & v)270 void signal_all(vector<Pipe>& v)
271 {
272 for (int i = 0; i < v.size(); i++) {
273 v[i].signal();
274 }
275 }
276
run_main(int iterations,int workers,int payload_size,int cs_pair,bool training_round=false)277 void run_main(int iterations,
278 int workers,
279 int payload_size,
280 int cs_pair,
281 bool training_round=false)
282 {
283 vector<Pipe> pipes;
284 // Create all the workers and wait for them to spawn.
285 for (int i = 0; i < workers; i++) {
286 pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair));
287 }
288 wait_all(pipes);
289
290 // Run the workers and wait for completion.
291 chrono::time_point<chrono::high_resolution_clock> start, end;
292 cout << "waiting for workers to complete" << endl;
293 start = chrono::high_resolution_clock::now();
294 signal_all(pipes);
295 wait_all(pipes);
296 end = chrono::high_resolution_clock::now();
297
298 // Calculate overall throughput.
299 double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9);
300 cout << "iterations per sec: " << iterations_per_sec << endl;
301
302 // Collect all results from the workers.
303 cout << "collecting results" << endl;
304 signal_all(pipes);
305 ProcResults tot_results;
306 for (int i = 0; i < workers; i++) {
307 ProcResults tmp_results;
308 pipes[i].recv(tmp_results);
309 tot_results = ProcResults::combine(tot_results, tmp_results);
310 }
311
312 // Kill all the workers.
313 cout << "killing workers" << endl;
314 signal_all(pipes);
315 for (int i = 0; i < workers; i++) {
316 int status;
317 wait(&status);
318 if (status != 0) {
319 cout << "nonzero child status" << status << endl;
320 }
321 }
322 if (training_round) {
323 // sets max_time_bucket to 2 * m_worst from the training round.
324 // Also needs to adjust time_per_bucket accordingly.
325 max_time_bucket = 2 * tot_results.m_worst;
326 time_per_bucket = max_time_bucket / num_buckets;
327 cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
328 } else {
329 tot_results.dump();
330 }
331 }
332
main(int argc,char * argv[])333 int main(int argc, char *argv[])
334 {
335 int workers = 2;
336 int iterations = 10000;
337 int payload_size = 0;
338 bool cs_pair = false;
339 bool training_round = false;
340 (void)argc;
341 (void)argv;
342
343 // Parse arguments.
344 for (int i = 1; i < argc; i++) {
345 if (string(argv[i]) == "--help") {
346 cout << "Usage: binderThroughputTest [OPTIONS]" << endl;
347 cout << "\t-i N : Specify number of iterations." << endl;
348 cout << "\t-m N : Specify expected max latency in microseconds." << endl;
349 cout << "\t-p : Split workers into client/server pairs." << endl;
350 cout << "\t-s N : Specify payload size." << endl;
351 cout << "\t-t N : Run training round." << endl;
352 cout << "\t-w N : Specify total number of workers." << endl;
353 return 0;
354 }
355 if (string(argv[i]) == "-w") {
356 workers = atoi(argv[i+1]);
357 i++;
358 continue;
359 }
360 if (string(argv[i]) == "-i") {
361 iterations = atoi(argv[i+1]);
362 i++;
363 continue;
364 }
365 if (string(argv[i]) == "-s") {
366 payload_size = atoi(argv[i+1]);
367 i++;
368 }
369 if (string(argv[i]) == "-p") {
370 // client/server pairs instead of spreading
371 // requests to all workers. If true, half
372 // the workers become clients and half servers
373 cs_pair = true;
374 }
375 if (string(argv[i]) == "-t") {
376 // Run one training round before actually collecting data
377 // to get an approximation of max latency.
378 training_round = true;
379 }
380 if (string(argv[i]) == "-m") {
381 // Caller specified the max latency in microseconds.
382 // No need to run training round in this case.
383 if (atoi(argv[i+1]) > 0) {
384 max_time_bucket = strtoull(argv[i+1], (char **)nullptr, 10) * 1000;
385 time_per_bucket = max_time_bucket / num_buckets;
386 i++;
387 } else {
388 cout << "Max latency -m must be positive." << endl;
389 exit(EXIT_FAILURE);
390 }
391 }
392 }
393
394 if (training_round) {
395 cout << "Start training round" << endl;
396 run_main(iterations, workers, payload_size, cs_pair, training_round=true);
397 cout << "Completed training round" << endl << endl;
398 }
399
400 run_main(iterations, workers, payload_size, cs_pair);
401 return 0;
402 }
403