1 /* Copyright (C) 2006-2007 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <limits.h>
17 #include <inttypes.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <errno.h>
21 #include <sys/time.h>
22 #include <time.h>
23 #include "cpu.h"
24 #include "exec-all.h"
25 #include "trace.h"
26 #include "varint.h"
27
28 TraceBB trace_bb;
29 TraceInsn trace_insn;
30 TraceStatic trace_static;
31 TraceAddr trace_load;
32 TraceAddr trace_store;
33 TraceExc trace_exc;
34 TracePid trace_pid;
35 TraceMethod trace_method;
36 static TraceHeader header;
37
38 const char *trace_filename;
39 int tracing;
40 int trace_cache_miss;
41 int trace_all_addr;
42
43 // The simulation time in cpu clock cycles
44 uint64_t sim_time = 1;
45
46 // The current process id
47 int current_pid;
48
49 // The start and end (wall-clock) time in microseconds
50 uint64_t start_time, end_time;
51 uint64_t elapsed_usecs;
52
53 // For debugging output
54 FILE *ftrace_debug;
55
56 // The maximum number of bytes consumed by an InsnRec after compression.
57 // This is very conservative but needed to ensure no buffer overflows.
58 #define kMaxInsnCompressed 14
59
60 // The maximum number of bytes consumed by an BBRec after compression.
61 // This is very conservative but needed to ensure no buffer overflows.
62 #define kMaxBBCompressed 32
63
64 // The maximum number of bytes consumed by an AddrRec after compression.
65 // This is very conservative but needed to ensure no buffer overflows.
66 #define kMaxAddrCompressed 14
67
68 // The maximum number of bytes consumed by a MethodRec after compression.
69 // This is very conservative but needed to ensure no buffer overflows.
70 #define kMaxMethodCompressed 18
71
72 // The maximum number of bytes consumed by an exception record after
73 // compression.
74 #define kMaxExcCompressed 38
75
76 // The maximum number of bytes consumed by a pid record for
77 // kPidSwitch, or kPidExit after compression.
78 #define kMaxPidCompressed 15
79
80 // The maximum number of bytes consumed by a pid record for kPidFork,
81 // or kPidClone after compression.
82 #define kMaxPid2Compressed 20
83
84 // The maximum number of bytes consumed by a pid record for kPidExecArgs
85 // after compression, not counting the bytes for the args.
86 #define kMaxExecArgsCompressed 15
87
88 // The maximum number of bytes consumed by a pid record for kPidName
89 // after compression, not counting the bytes for the name.
90 #define kMaxNameCompressed 20
91
92 // The maximum number of bytes consumed by a pid record for kPidMmap
93 // after compression, not counting the bytes for the pathname.
94 #define kMaxMmapCompressed 33
95
96 // The maximum number of bytes consumed by a pid record for kPidMunmap,
97 // after compression.
98 #define kMaxMunmapCompressed 28
99
100 // The maximum number of bytes consumed by a pid record for kPidSymbol
101 // after compression, not counting the bytes for the symbol name.
102 #define kMaxSymbolCompressed 24
103
104 // The maximum number of bytes consumed by a pid record for kPidKthreadName
105 // after compression, not counting the bytes for the name.
106 #define kMaxKthreadNameCompressed 25
107
108 void trace_cleanup();
109
110 // Return current time in microseconds as a 64-bit integer.
Now()111 uint64 Now() {
112 struct timeval tv;
113
114 gettimeofday(&tv, NULL);
115 uint64 val = tv.tv_sec;
116 val = val * 1000000ull + tv.tv_usec;
117 return val;
118 }
119
create_trace_dir(const char * dirname)120 static void create_trace_dir(const char *dirname)
121 {
122 int err;
123
124 err = path_mkdir(dirname, 0755);
125 if (err != 0 && errno != EEXIST) {
126 printf("err: %d\n", err);
127 perror(dirname);
128 exit(1);
129 }
130 }
131
create_trace_path(const char * filename,const char * ext)132 static char *create_trace_path(const char *filename, const char *ext)
133 {
134 char *fname;
135 const char *base_start, *base_end;
136 int ii, len, base_len, dir_len, path_len, qtrace_len;
137
138 // Handle error cases
139 if (filename == NULL || *filename == 0 || strcmp(filename, "/") == 0)
140 return NULL;
141
142 // Ignore a trailing slash, if any
143 len = strlen(filename);
144 if (filename[len - 1] == '/')
145 len -= 1;
146
147 // Find the basename. We don't use basename(3) because there are
148 // different behaviors for GNU and Posix in the case where the
149 // last character is a slash.
150 base_start = base_end = &filename[len];
151 for (ii = 0; ii < len; ++ii) {
152 base_start -= 1;
153 if (*base_start == '/') {
154 base_start += 1;
155 break;
156 }
157 }
158 base_len = base_end - base_start;
159 dir_len = len - base_len;
160 qtrace_len = strlen("/qtrace");
161
162 // Create space for the pathname: "/dir/basename/qtrace.ext"
163 // The "ext" string already contains the dot, so just add a byte
164 // for the terminating zero.
165 path_len = dir_len + base_len + qtrace_len + strlen(ext) + 1;
166 fname = malloc(path_len);
167 if (dir_len > 0)
168 strncpy(fname, filename, dir_len);
169 fname[dir_len] = 0;
170 strncat(fname, base_start, base_len);
171 strcat(fname, "/qtrace");
172 strcat(fname, ext);
173 return fname;
174 }
175
convert_secs_to_date_time(time_t secs,uint32_t * pdate,uint32_t * ptime)176 void convert_secs_to_date_time(time_t secs, uint32_t *pdate, uint32_t *ptime)
177 {
178 struct tm *tm = localtime(&secs);
179 uint32_t year = tm->tm_year + 1900;
180 uint32_t thousands = year / 1000;
181 year -= thousands * 1000;
182 uint32_t hundreds = year / 100;
183 year -= hundreds * 100;
184 uint32_t tens = year / 10;
185 year -= tens * 10;
186 uint32_t ones = year;
187 year = (thousands << 12) | (hundreds << 8) | (tens << 4) | ones;
188
189 uint32_t mon = tm->tm_mon + 1;
190 tens = mon / 10;
191 ones = (mon - tens * 10);
192 mon = (tens << 4) | ones;
193
194 uint32_t day = tm->tm_mday;
195 tens = day / 10;
196 ones = (day - tens * 10);
197 day = (tens << 4) | ones;
198
199 *pdate = (year << 16) | (mon << 8) | day;
200
201 uint32_t hour = tm->tm_hour;
202 tens = hour / 10;
203 ones = (hour - tens * 10);
204 hour = (tens << 4) | ones;
205
206 uint32_t min = tm->tm_min;
207 tens = min / 10;
208 ones = (min - tens * 10);
209 min = (tens << 4) | ones;
210
211 uint32_t sec = tm->tm_sec;
212 tens = sec / 10;
213 ones = (sec - tens * 10);
214 sec = (tens << 4) | ones;
215
216 *ptime = (hour << 16) | (min << 8) | sec;
217 }
218
write_trace_header(TraceHeader * header)219 void write_trace_header(TraceHeader *header)
220 {
221 TraceHeader swappedHeader;
222
223 memcpy(&swappedHeader, header, sizeof(TraceHeader));
224
225 convert32(swappedHeader.version);
226 convert32(swappedHeader.start_sec);
227 convert32(swappedHeader.start_usec);
228 convert32(swappedHeader.pdate);
229 convert32(swappedHeader.ptime);
230 convert32(swappedHeader.num_used_pids);
231 convert32(swappedHeader.first_unused_pid);
232 convert64(swappedHeader.num_static_bb);
233 convert64(swappedHeader.num_static_insn);
234 convert64(swappedHeader.num_dynamic_bb);
235 convert64(swappedHeader.num_dynamic_insn);
236 convert64(swappedHeader.elapsed_usecs);
237
238 fwrite(&swappedHeader, sizeof(TraceHeader), 1, trace_static.fstream);
239 }
240
create_trace_bb(const char * filename)241 void create_trace_bb(const char *filename)
242 {
243 char *fname = create_trace_path(filename, ".bb");
244 trace_bb.filename = fname;
245
246 FILE *fstream = fopen(fname, "wb");
247 if (fstream == NULL) {
248 perror(fname);
249 exit(1);
250 }
251 trace_bb.fstream = fstream;
252 trace_bb.next = &trace_bb.buffer[0];
253 trace_bb.flush_time = 0;
254 trace_bb.compressed_ptr = trace_bb.compressed;
255 trace_bb.high_water_ptr = &trace_bb.compressed[kCompressedSize] - kMaxBBCompressed;
256 trace_bb.prev_bb_num = 0;
257 trace_bb.prev_bb_time = 0;
258 trace_bb.num_insns = 0;
259 trace_bb.recnum = 0;
260 }
261
create_trace_insn(const char * filename)262 void create_trace_insn(const char *filename)
263 {
264 // Create the instruction time trace file
265 char *fname = create_trace_path(filename, ".insn");
266 trace_insn.filename = fname;
267
268 FILE *fstream = fopen(fname, "wb");
269 if (fstream == NULL) {
270 perror(fname);
271 exit(1);
272 }
273 trace_insn.fstream = fstream;
274 trace_insn.current = &trace_insn.dummy;
275 trace_insn.dummy.time_diff = 0;
276 trace_insn.dummy.repeat = 0;
277 trace_insn.prev_time = 0;
278 trace_insn.compressed_ptr = trace_insn.compressed;
279 trace_insn.high_water_ptr = &trace_insn.compressed[kCompressedSize] - kMaxInsnCompressed;
280 }
281
create_trace_static(const char * filename)282 void create_trace_static(const char *filename)
283 {
284 // Create the static basic block trace file
285 char *fname = create_trace_path(filename, ".static");
286 trace_static.filename = fname;
287
288 FILE *fstream = fopen(fname, "wb");
289 if (fstream == NULL) {
290 perror(fname);
291 exit(1);
292 }
293 trace_static.fstream = fstream;
294 trace_static.next_insn = 0;
295 trace_static.bb_num = 1;
296 trace_static.bb_addr = 0;
297
298 // Write an empty header to reserve space for it in the file.
299 // The header will be filled in later when post-processing the
300 // trace file.
301 memset(&header, 0, sizeof(TraceHeader));
302
303 // Write out the version number so that tools can detect if the trace
304 // file format is the same as what they expect.
305 header.version = TRACE_VERSION;
306
307 // Record the start time in the header now.
308 struct timeval tv;
309 struct timezone tz;
310 gettimeofday(&tv, &tz);
311 header.start_sec = tv.tv_sec;
312 header.start_usec = tv.tv_usec;
313 convert_secs_to_date_time(header.start_sec, &header.pdate, &header.ptime);
314 write_trace_header(&header);
315
316 // Write out the record for the unused basic block number 0.
317 uint64_t zero = 0;
318 fwrite(&zero, sizeof(uint64_t), 1, trace_static.fstream); // bb_num
319 fwrite(&zero, sizeof(uint32_t), 1, trace_static.fstream); // bb_addr
320 fwrite(&zero, sizeof(uint32_t), 1, trace_static.fstream); // num_insns
321 }
322
create_trace_addr(const char * filename)323 void create_trace_addr(const char *filename)
324 {
325 // The "qtrace.load" and "qtrace.store" files are optional
326 trace_load.fstream = NULL;
327 trace_store.fstream = NULL;
328 if (trace_all_addr || trace_cache_miss) {
329 // Create the "qtrace.load" file
330 char *fname = create_trace_path(filename, ".load");
331 trace_load.filename = fname;
332
333 FILE *fstream = fopen(fname, "wb");
334 if (fstream == NULL) {
335 perror(fname);
336 exit(1);
337 }
338 trace_load.fstream = fstream;
339 trace_load.next = &trace_load.buffer[0];
340 trace_load.compressed_ptr = trace_load.compressed;
341 trace_load.high_water_ptr = &trace_load.compressed[kCompressedSize] - kMaxAddrCompressed;
342 trace_load.prev_addr = 0;
343 trace_load.prev_time = 0;
344
345 // Create the "qtrace.store" file
346 fname = create_trace_path(filename, ".store");
347 trace_store.filename = fname;
348
349 fstream = fopen(fname, "wb");
350 if (fstream == NULL) {
351 perror(fname);
352 exit(1);
353 }
354 trace_store.fstream = fstream;
355 trace_store.next = &trace_store.buffer[0];
356 trace_store.compressed_ptr = trace_store.compressed;
357 trace_store.high_water_ptr = &trace_store.compressed[kCompressedSize] - kMaxAddrCompressed;
358 trace_store.prev_addr = 0;
359 trace_store.prev_time = 0;
360 }
361 }
362
create_trace_exc(const char * filename)363 void create_trace_exc(const char *filename)
364 {
365 // Create the exception trace file
366 char *fname = create_trace_path(filename, ".exc");
367 trace_exc.filename = fname;
368
369 FILE *fstream = fopen(fname, "wb");
370 if (fstream == NULL) {
371 perror(fname);
372 exit(1);
373 }
374 trace_exc.fstream = fstream;
375 trace_exc.compressed_ptr = trace_exc.compressed;
376 trace_exc.high_water_ptr = &trace_exc.compressed[kCompressedSize] - kMaxExcCompressed;
377 trace_exc.prev_time = 0;
378 trace_exc.prev_bb_recnum = 0;
379 }
380
create_trace_pid(const char * filename)381 void create_trace_pid(const char *filename)
382 {
383 // Create the pid trace file
384 char *fname = create_trace_path(filename, ".pid");
385 trace_pid.filename = fname;
386
387 FILE *fstream = fopen(fname, "wb");
388 if (fstream == NULL) {
389 perror(fname);
390 exit(1);
391 }
392 trace_pid.fstream = fstream;
393 trace_pid.compressed_ptr = trace_pid.compressed;
394 trace_pid.prev_time = 0;
395 }
396
create_trace_method(const char * filename)397 void create_trace_method(const char *filename)
398 {
399 // Create the method trace file
400 char *fname = create_trace_path(filename, ".method");
401 trace_method.filename = fname;
402
403 FILE *fstream = fopen(fname, "wb");
404 if (fstream == NULL) {
405 perror(fname);
406 exit(1);
407 }
408 trace_method.fstream = fstream;
409 trace_method.compressed_ptr = trace_method.compressed;
410 trace_method.prev_time = 0;
411 trace_method.prev_addr = 0;
412 trace_method.prev_pid = 0;
413 }
414
trace_init(const char * filename)415 void trace_init(const char *filename)
416 {
417 // Create the trace files
418 create_trace_dir(filename);
419 create_trace_bb(filename);
420 create_trace_insn(filename);
421 create_trace_static(filename);
422 create_trace_addr(filename);
423 create_trace_exc(filename);
424 create_trace_pid(filename);
425 create_trace_method(filename);
426
427 #if 0
428 char *fname = create_trace_path(filename, ".debug");
429 ftrace_debug = fopen(fname, "wb");
430 if (ftrace_debug == NULL) {
431 perror(fname);
432 exit(1);
433 }
434 #else
435 ftrace_debug = NULL;
436 #endif
437 atexit(trace_cleanup);
438
439 // If tracing is on, then start timing the simulator
440 if (tracing)
441 start_time = Now();
442 }
443
444 /* the following array is used to deal with def-use register interlocks, which we
445 * can compute statically (ignoring conditions), very fortunately.
446 *
447 * the idea is that interlock_base contains the number of cycles "executed" from
448 * the start of a basic block. It is set to 0 in trace_bb_start, and incremented
449 * in each call to get_insn_ticks_arm.
450 *
451 * interlocks[N] correspond to the value of interlock_base after which a register N
452 * can be used by another operation, it is set each time an instruction writes to
453 * the register in get_insn_ticks()
454 */
455
456 static int interlocks[16];
457 static int interlock_base;
458
459 static void
_interlock_def(int reg,int delay)460 _interlock_def(int reg, int delay)
461 {
462 if (reg >= 0)
463 interlocks[reg] = interlock_base + delay;
464 }
465
466 static int
_interlock_use(int reg)467 _interlock_use(int reg)
468 {
469 int delay = 0;
470
471 if (reg >= 0)
472 {
473 delay = interlocks[reg] - interlock_base;
474 if (delay < 0)
475 delay = 0;
476 }
477 return delay;
478 }
479
trace_bb_start(uint32_t bb_addr)480 void trace_bb_start(uint32_t bb_addr)
481 {
482 int nn;
483
484 trace_static.bb_addr = bb_addr;
485 trace_static.is_thumb = 0;
486
487 interlock_base = 0;
488 for (nn = 0; nn < 16; nn++)
489 interlocks[nn] = 0;
490 }
491
trace_add_insn(uint32_t insn,int is_thumb)492 void trace_add_insn(uint32_t insn, int is_thumb)
493 {
494 trace_static.insns[trace_static.next_insn++] = insn;
495 // This relies on the fact that a basic block does not contain a mix
496 // of ARM and Thumb instructions. If that is not true, then many
497 // software tools that read the trace will have to change.
498 trace_static.is_thumb = is_thumb;
499 }
500
trace_bb_end()501 void trace_bb_end()
502 {
503 int ii, num_insns;
504 uint32_t insn;
505
506 uint64_t bb_num = hostToLE64(trace_static.bb_num);
507 // If these are Thumb instructions, then encode that fact by setting
508 // the low bit of the basic-block address to 1.
509 uint32_t bb_addr = trace_static.bb_addr | trace_static.is_thumb;
510 bb_addr = hostToLE32(bb_addr);
511 num_insns = hostToLE32(trace_static.next_insn);
512 fwrite(&bb_num, sizeof(bb_num), 1, trace_static.fstream);
513 fwrite(&bb_addr, sizeof(bb_addr), 1, trace_static.fstream);
514 fwrite(&num_insns, sizeof(num_insns), 1, trace_static.fstream);
515 for (ii = 0; ii < trace_static.next_insn; ++ii) {
516 insn = hostToLE32(trace_static.insns[ii]);
517 fwrite(&insn, sizeof(insn), 1, trace_static.fstream);
518 }
519
520 trace_static.bb_num += 1;
521 trace_static.next_insn = 0;
522 }
523
trace_cleanup()524 void trace_cleanup()
525 {
526 if (tracing) {
527 end_time = Now();
528 elapsed_usecs += end_time - start_time;
529 }
530 header.elapsed_usecs = elapsed_usecs;
531 double elapsed_secs = elapsed_usecs / 1000000.0;
532 double cycles_per_sec = 0;
533 if (elapsed_secs != 0)
534 cycles_per_sec = sim_time / elapsed_secs;
535 char *suffix = "";
536 if (cycles_per_sec >= 1000000) {
537 cycles_per_sec /= 1000000.0;
538 suffix = "M";
539 } else if (cycles_per_sec > 1000) {
540 cycles_per_sec /= 1000.0;
541 suffix = "K";
542 }
543 printf("Elapsed seconds: %.2f, simulated cycles/sec: %.1f%s\n",
544 elapsed_secs, cycles_per_sec, suffix);
545 if (trace_bb.fstream) {
546 BBRec *ptr;
547 BBRec *next = trace_bb.next;
548 char *comp_ptr = trace_bb.compressed_ptr;
549 int64_t prev_bb_num = trace_bb.prev_bb_num;
550 uint64_t prev_bb_time = trace_bb.prev_bb_time;
551 for (ptr = trace_bb.buffer; ptr != next; ++ptr) {
552 if (comp_ptr >= trace_bb.high_water_ptr) {
553 uint32_t size = comp_ptr - trace_bb.compressed;
554 fwrite(trace_bb.compressed, sizeof(char), size,
555 trace_bb.fstream);
556 comp_ptr = trace_bb.compressed;
557 }
558 int64_t bb_diff = ptr->bb_num - prev_bb_num;
559 prev_bb_num = ptr->bb_num;
560 uint64_t time_diff = ptr->start_time - prev_bb_time;
561 prev_bb_time = ptr->start_time;
562 comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
563 comp_ptr = varint_encode(time_diff, comp_ptr);
564 comp_ptr = varint_encode(ptr->repeat, comp_ptr);
565 if (ptr->repeat)
566 comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
567 }
568
569 // Add an extra record at the end containing the ending simulation
570 // time and a basic block number of 0.
571 uint64_t time_diff = sim_time - prev_bb_time;
572 if (time_diff > 0) {
573 int64_t bb_diff = -prev_bb_num;
574 comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
575 comp_ptr = varint_encode(time_diff, comp_ptr);
576 comp_ptr = varint_encode(0, comp_ptr);
577 }
578
579 uint32_t size = comp_ptr - trace_bb.compressed;
580 if (size)
581 fwrite(trace_bb.compressed, sizeof(char), size, trace_bb.fstream);
582
583 // Terminate the file with three zeros so that we can detect
584 // the end of file quickly.
585 uint32_t zeros = 0;
586 fwrite(&zeros, 3, 1, trace_bb.fstream);
587 fclose(trace_bb.fstream);
588 }
589
590 if (trace_insn.fstream) {
591 InsnRec *ptr;
592 InsnRec *current = trace_insn.current + 1;
593 char *comp_ptr = trace_insn.compressed_ptr;
594 for (ptr = trace_insn.buffer; ptr != current; ++ptr) {
595 if (comp_ptr >= trace_insn.high_water_ptr) {
596 uint32_t size = comp_ptr - trace_insn.compressed;
597 uint32_t rval = fwrite(trace_insn.compressed, sizeof(char),
598 size, trace_insn.fstream);
599 if (rval != size) {
600 fprintf(stderr, "fwrite() failed\n");
601 perror(trace_insn.filename);
602 exit(1);
603 }
604 comp_ptr = trace_insn.compressed;
605 }
606 comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
607 comp_ptr = varint_encode(ptr->repeat, comp_ptr);
608 }
609
610 uint32_t size = comp_ptr - trace_insn.compressed;
611 if (size) {
612 uint32_t rval = fwrite(trace_insn.compressed, sizeof(char), size,
613 trace_insn.fstream);
614 if (rval != size) {
615 fprintf(stderr, "fwrite() failed\n");
616 perror(trace_insn.filename);
617 exit(1);
618 }
619 }
620 fclose(trace_insn.fstream);
621 }
622
623 if (trace_static.fstream) {
624 fseek(trace_static.fstream, 0, SEEK_SET);
625 write_trace_header(&header);
626 fclose(trace_static.fstream);
627 }
628
629 if (trace_load.fstream) {
630 AddrRec *ptr;
631 char *comp_ptr = trace_load.compressed_ptr;
632 AddrRec *next = trace_load.next;
633 uint32_t prev_addr = trace_load.prev_addr;
634 uint64_t prev_time = trace_load.prev_time;
635 for (ptr = trace_load.buffer; ptr != next; ++ptr) {
636 if (comp_ptr >= trace_load.high_water_ptr) {
637 uint32_t size = comp_ptr - trace_load.compressed;
638 fwrite(trace_load.compressed, sizeof(char), size,
639 trace_load.fstream);
640 comp_ptr = trace_load.compressed;
641 }
642
643 int addr_diff = ptr->addr - prev_addr;
644 uint64_t time_diff = ptr->time - prev_time;
645 prev_addr = ptr->addr;
646 prev_time = ptr->time;
647
648 comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
649 comp_ptr = varint_encode(time_diff, comp_ptr);
650 }
651
652 uint32_t size = comp_ptr - trace_load.compressed;
653 if (size) {
654 fwrite(trace_load.compressed, sizeof(char), size,
655 trace_load.fstream);
656 }
657
658 // Terminate the file with two zeros so that we can detect
659 // the end of file quickly.
660 uint32_t zeros = 0;
661 fwrite(&zeros, 2, 1, trace_load.fstream);
662 fclose(trace_load.fstream);
663 }
664
665 if (trace_store.fstream) {
666 AddrRec *ptr;
667 char *comp_ptr = trace_store.compressed_ptr;
668 AddrRec *next = trace_store.next;
669 uint32_t prev_addr = trace_store.prev_addr;
670 uint64_t prev_time = trace_store.prev_time;
671 for (ptr = trace_store.buffer; ptr != next; ++ptr) {
672 if (comp_ptr >= trace_store.high_water_ptr) {
673 uint32_t size = comp_ptr - trace_store.compressed;
674 fwrite(trace_store.compressed, sizeof(char), size,
675 trace_store.fstream);
676 comp_ptr = trace_store.compressed;
677 }
678
679 int addr_diff = ptr->addr - prev_addr;
680 uint64_t time_diff = ptr->time - prev_time;
681 prev_addr = ptr->addr;
682 prev_time = ptr->time;
683
684 comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
685 comp_ptr = varint_encode(time_diff, comp_ptr);
686 }
687
688 uint32_t size = comp_ptr - trace_store.compressed;
689 if (size) {
690 fwrite(trace_store.compressed, sizeof(char), size,
691 trace_store.fstream);
692 }
693
694 // Terminate the file with two zeros so that we can detect
695 // the end of file quickly.
696 uint32_t zeros = 0;
697 fwrite(&zeros, 2, 1, trace_store.fstream);
698 fclose(trace_store.fstream);
699 }
700
701 if (trace_exc.fstream) {
702 uint32_t size = trace_exc.compressed_ptr - trace_exc.compressed;
703 if (size) {
704 fwrite(trace_exc.compressed, sizeof(char), size,
705 trace_exc.fstream);
706 }
707
708 // Terminate the file with 7 zeros so that we can detect
709 // the end of file quickly.
710 uint64_t zeros = 0;
711 fwrite(&zeros, 7, 1, trace_exc.fstream);
712 fclose(trace_exc.fstream);
713 }
714 if (trace_pid.fstream) {
715 uint32_t size = trace_pid.compressed_ptr - trace_pid.compressed;
716 if (size) {
717 fwrite(trace_pid.compressed, sizeof(char), size,
718 trace_pid.fstream);
719 }
720
721 // Terminate the file with 2 zeros so that we can detect
722 // the end of file quickly.
723 uint64_t zeros = 0;
724 fwrite(&zeros, 2, 1, trace_pid.fstream);
725 fclose(trace_pid.fstream);
726 }
727 if (trace_method.fstream) {
728 uint32_t size = trace_method.compressed_ptr - trace_method.compressed;
729 if (size) {
730 fwrite(trace_method.compressed, sizeof(char), size,
731 trace_method.fstream);
732 }
733
734 // Terminate the file with 2 zeros so that we can detect
735 // the end of file quickly.
736 uint64_t zeros = 0;
737 fwrite(&zeros, 2, 1, trace_method.fstream);
738 fclose(trace_method.fstream);
739 }
740 if (ftrace_debug)
741 fclose(ftrace_debug);
742 }
743
744 // Define the number of clock ticks for some instructions. Add one to these
745 // (in some cases) if there is an interlock. We currently do not check for
746 // interlocks.
747 #define TICKS_OTHER 1
748 #define TICKS_SMULxy 1
749 #define TICKS_SMLAWy 1
750 #define TICKS_SMLALxy 2
751 #define TICKS_MUL 2
752 #define TICKS_MLA 2
753 #define TICKS_MULS 4 // no interlock penalty
754 #define TICKS_MLAS 4 // no interlock penalty
755 #define TICKS_UMULL 3
756 #define TICKS_UMLAL 3
757 #define TICKS_SMULL 3
758 #define TICKS_SMLAL 3
759 #define TICKS_UMULLS 5 // no interlock penalty
760 #define TICKS_UMLALS 5 // no interlock penalty
761 #define TICKS_SMULLS 5 // no interlock penalty
762 #define TICKS_SMLALS 5 // no interlock penalty
763
764 // Compute the number of cycles that this instruction will take,
765 // not including any I-cache or D-cache misses. This function
766 // is called for each instruction in a basic block when that
767 // block is being translated.
get_insn_ticks_arm(uint32_t insn)768 int get_insn_ticks_arm(uint32_t insn)
769 {
770 #if 1
771 int result = 1; /* by default, use 1 cycle */
772
773 /* See Chapter 12 of the ARM920T Reference Manual for details about clock cycles */
774
775 /* first check for invalid condition codes */
776 if ((insn >> 28) == 0xf)
777 {
778 if ((insn >> 25) == 0x7d) { /* BLX */
779 result = 3;
780 goto Exit;
781 }
782 /* XXX: if we get there, we're either in an UNDEFINED instruction */
783 /* or in co-processor related ones. For now, only return 1 cycle */
784 goto Exit;
785 }
786
787 /* other cases */
788 switch ((insn >> 25) & 7)
789 {
790 case 0:
791 if ((insn & 0x00000090) == 0x00000090) /* Multiplies, extra load/store, Table 3-2 */
792 {
793 /* XXX: TODO: Add support for multiplier operand content penalties in the translator */
794
795 if ((insn & 0x0fc000f0) == 0x00000090) /* 3-2: Multiply (accumulate) */
796 {
797 int Rm = (insn & 15);
798 int Rs = (insn >> 8) & 15;
799 int Rn = (insn >> 12) & 15;
800
801 if ((insn & 0x00200000) != 0) { /* MLA */
802 result += _interlock_use(Rn);
803 } else { /* MLU */
804 if (Rn != 0) /* UNDEFINED */
805 goto Exit;
806 }
807 /* cycles=2+m, assume m=1, this should be adjusted at interpretation time */
808 result += 2 + _interlock_use(Rm) + _interlock_use(Rs);
809 }
810 else if ((insn & 0x0f8000f0) == 0x00800090) /* 3-2: Multiply (accumulate) long */
811 {
812 int Rm = (insn & 15);
813 int Rs = (insn >> 8) & 15;
814 int RdLo = (insn >> 12) & 15;
815 int RdHi = (insn >> 16) & 15;
816
817 if ((insn & 0x00200000) != 0) { /* SMLAL & UMLAL */
818 result += _interlock_use(RdLo) + _interlock_use(RdHi);
819 }
820 /* else SMLL and UMLL */
821
822 /* cucles=3+m, assume m=1, this should be adjusted at interpretation time */
823 result += 3 + _interlock_use(Rm) + _interlock_use(Rs);
824 }
825 else if ((insn & 0x0fd00ff0) == 0x01000090) /* 3-2: Swap/swap byte */
826 {
827 int Rm = (insn & 15);
828 int Rd = (insn >> 8) & 15;
829
830 result = 2 + _interlock_use(Rm);
831 _interlock_def(Rd, result+1);
832 }
833 else if ((insn & 0x0e400ff0) == 0x00000090) /* 3-2: load/store halfword, reg offset */
834 {
835 int Rm = (insn & 15);
836 int Rd = (insn >> 12) & 15;
837 int Rn = (insn >> 16) & 15;
838
839 result += _interlock_use(Rn) + _interlock_use(Rm);
840 if ((insn & 0x00100000) != 0) /* it's a load, there's a 2-cycle interlock */
841 _interlock_def(Rd, result+2);
842 }
843 else if ((insn & 0x0e400ff0) == 0x00400090) /* 3-2: load/store halfword, imm offset */
844 {
845 int Rd = (insn >> 12) & 15;
846 int Rn = (insn >> 16) & 15;
847
848 result += _interlock_use(Rn);
849 if ((insn & 0x00100000) != 0) /* it's a load, there's a 2-cycle interlock */
850 _interlock_def(Rd, result+2);
851 }
852 else if ((insn & 0x0e500fd0) == 0x000000d0) /* 3-2: load/store two words, reg offset */
853 {
854 /* XXX: TODO: Enhanced DSP instructions */
855 }
856 else if ((insn & 0x0e500fd0) == 0x001000d0) /* 3-2: load/store half/byte, reg offset */
857 {
858 int Rm = (insn & 15);
859 int Rd = (insn >> 12) & 15;
860 int Rn = (insn >> 16) & 15;
861
862 result += _interlock_use(Rn) + _interlock_use(Rm);
863 if ((insn & 0x00100000) != 0) /* load, 2-cycle interlock */
864 _interlock_def(Rd, result+2);
865 }
866 else if ((insn & 0x0e5000d0) == 0x004000d0) /* 3-2: load/store two words, imm offset */
867 {
868 /* XXX: TODO: Enhanced DSP instructions */
869 }
870 else if ((insn & 0x0e5000d0) == 0x005000d0) /* 3-2: load/store half/byte, imm offset */
871 {
872 int Rd = (insn >> 12) & 15;
873 int Rn = (insn >> 16) & 15;
874
875 result += _interlock_use(Rn);
876 if ((insn & 0x00100000) != 0) /* load, 2-cycle interlock */
877 _interlock_def(Rd, result+2);
878 }
879 else
880 {
881 /* UNDEFINED */
882 }
883 }
884 else if ((insn & 0x0f900000) == 0x01000000) /* Misc. instructions, table 3-3 */
885 {
886 switch ((insn >> 4) & 15)
887 {
888 case 0:
889 if ((insn & 0x0fb0fff0) == 0x0120f000) /* move register to status register */
890 {
891 int Rm = (insn & 15);
892 result += _interlock_use(Rm);
893 }
894 break;
895
896 case 1:
897 if ( ((insn & 0x0ffffff0) == 0x01200010) || /* branch/exchange */
898 ((insn & 0x0fff0ff0) == 0x01600010) ) /* count leading zeroes */
899 {
900 int Rm = (insn & 15);
901 result += _interlock_use(Rm);
902 }
903 break;
904
905 case 3:
906 if ((insn & 0x0ffffff0) == 0x01200030) /* link/exchange */
907 {
908 int Rm = (insn & 15);
909 result += _interlock_use(Rm);
910 }
911 break;
912
913 default:
914 /* TODO: Enhanced DSP instructions */
915 ;
916 }
917 }
918 else /* Data processing */
919 {
920 int Rm = (insn & 15);
921 int Rn = (insn >> 16) & 15;
922
923 result += _interlock_use(Rn) + _interlock_use(Rm);
924 if ((insn & 0x10)) { /* register-controlled shift => 1 cycle penalty */
925 int Rs = (insn >> 8) & 15;
926 result += 1 + _interlock_use(Rs);
927 }
928 }
929 break;
930
931 case 1:
932 if ((insn & 0x01900000) == 0x01900000)
933 {
934 /* either UNDEFINED or move immediate to CPSR */
935 }
936 else /* Data processing immediate */
937 {
938 int Rn = (insn >> 12) & 15;
939 result += _interlock_use(Rn);
940 }
941 break;
942
943 case 2: /* load/store immediate */
944 {
945 int Rn = (insn >> 16) & 15;
946
947 result += _interlock_use(Rn);
948 if (insn & 0x00100000) { /* LDR */
949 int Rd = (insn >> 12) & 15;
950
951 if (Rd == 15) /* loading PC */
952 result = 5;
953 else
954 _interlock_def(Rd,result+1);
955 }
956 }
957 break;
958
959 case 3:
960 if ((insn & 0x10) == 0) /* load/store register offset */
961 {
962 int Rm = (insn & 15);
963 int Rn = (insn >> 16) & 15;
964
965 result += _interlock_use(Rm) + _interlock_use(Rn);
966
967 if (insn & 0x00100000) { /* LDR */
968 int Rd = (insn >> 12) & 15;
969 if (Rd == 15)
970 result = 5;
971 else
972 _interlock_def(Rd,result+1);
973 }
974 }
975 /* else UNDEFINED */
976 break;
977
978 case 4: /* load/store multiple */
979 {
980 int Rn = (insn >> 16) & 15;
981 uint32_t mask = (insn & 0xffff);
982 int count;
983
984 for (count = 0; mask; count++)
985 mask &= (mask-1);
986
987 result += _interlock_use(Rn);
988
989 if (insn & 0x00100000) /* LDM */
990 {
991 int nn;
992
993 if (insn & 0x8000) { /* loading PC */
994 result = count+4;
995 } else { /* not loading PC */
996 result = (count < 2) ? 2 : count;
997 }
998 /* create defs, all registers locked until the end of the load */
999 for (nn = 0; nn < 15; nn++)
1000 if ((insn & (1U << nn)) != 0)
1001 _interlock_def(nn,result);
1002 }
1003 else /* STM */
1004 result = (count < 2) ? 2 : count;
1005 }
1006 break;
1007
1008 case 5: /* branch and branch+link */
1009 break;
1010
1011 case 6: /* coprocessor load/store */
1012 {
1013 int Rn = (insn >> 16) & 15;
1014
1015 if (insn & 0x00100000)
1016 result += _interlock_use(Rn);
1017
1018 /* XXX: other things to do ? */
1019 }
1020 break;
1021
1022 default: /* i.e. 7 */
1023 /* XXX: TODO: co-processor related things */
1024 ;
1025 }
1026 Exit:
1027 interlock_base += result;
1028 return result;
1029 #else /* old code - this seems to be completely buggy ?? */
1030 if ((insn & 0x0ff0f090) == 0x01600080) {
1031 return TICKS_SMULxy;
1032 } else if ((insn & 0x0ff00090) == 0x01200080) {
1033 return TICKS_SMLAWy;
1034 } else if ((insn & 0x0ff00090) == 0x01400080) {
1035 return TICKS_SMLALxy;
1036 } else if ((insn & 0x0f0000f0) == 0x00000090) {
1037 // multiply
1038 uint8_t bit23 = (insn >> 23) & 0x1;
1039 uint8_t bit22_U = (insn >> 22) & 0x1;
1040 uint8_t bit21_A = (insn >> 21) & 0x1;
1041 uint8_t bit20_S = (insn >> 20) & 0x1;
1042
1043 if (bit23 == 0) {
1044 // 32-bit multiply
1045 if (bit22_U != 0) {
1046 // This is an unexpected bit pattern.
1047 return TICKS_OTHER;
1048 }
1049 if (bit21_A == 0) {
1050 if (bit20_S)
1051 return TICKS_MULS;
1052 return TICKS_MUL;
1053 }
1054 if (bit20_S)
1055 return TICKS_MLAS;
1056 return TICKS_MLA;
1057 }
1058 // 64-bit multiply
1059 if (bit22_U == 0) {
1060 // Unsigned multiply long
1061 if (bit21_A == 0) {
1062 if (bit20_S)
1063 return TICKS_UMULLS;
1064 return TICKS_UMULL;
1065 }
1066 if (bit20_S)
1067 return TICKS_UMLALS;
1068 return TICKS_UMLAL;
1069 }
1070 // Signed multiply long
1071 if (bit21_A == 0) {
1072 if (bit20_S)
1073 return TICKS_SMULLS;
1074 return TICKS_SMULL;
1075 }
1076 if (bit20_S)
1077 return TICKS_SMLALS;
1078 return TICKS_SMLAL;
1079 }
1080 return TICKS_OTHER;
1081 #endif
1082 }
1083
get_insn_ticks_thumb(uint32_t insn)1084 int get_insn_ticks_thumb(uint32_t insn)
1085 {
1086 #if 1
1087 int result = 1;
1088
1089 switch ((insn >> 11) & 31)
1090 {
1091 case 0:
1092 case 1:
1093 case 2: /* Shift by immediate */
1094 {
1095 int Rm = (insn >> 3) & 7;
1096 result += _interlock_use(Rm);
1097 }
1098 break;
1099
1100 case 3: /* Add/Substract */
1101 {
1102 int Rn = (insn >> 3) & 7;
1103 result += _interlock_use(Rn);
1104
1105 if ((insn & 0x0400) == 0) { /* register value */
1106 int Rm = (insn >> 6) & 7;
1107 result += _interlock_use(Rm);
1108 }
1109 }
1110 break;
1111
1112 case 4: /* move immediate */
1113 break;
1114
1115 case 5:
1116 case 6:
1117 case 7: /* add/substract/compare immediate */
1118 {
1119 int Rd = (insn >> 8) & 7;
1120 result += _interlock_use(Rd);
1121 }
1122 break;
1123
1124 case 8:
1125 {
1126 if ((insn & 0x0400) == 0) /* data processing register */
1127 {
1128 /* the registers can also be Rs and Rn in some cases */
1129 /* but they're always read anyway and located at the */
1130 /* same place, so we don't check the opcode */
1131 int Rm = (insn >> 3) & 7;
1132 int Rd = (insn >> 3) & 7;
1133
1134 result += _interlock_use(Rm) + _interlock_use(Rd);
1135 }
1136 else switch ((insn >> 8) & 3)
1137 {
1138 case 0:
1139 case 1:
1140 case 2: /* special data processing */
1141 {
1142 int Rn = (insn & 7) | ((insn >> 4) & 0x8);
1143 int Rm = ((insn >> 3) & 15);
1144
1145 result += _interlock_use(Rn) + _interlock_use(Rm);
1146 }
1147 break;
1148
1149 case 3:
1150 if ((insn & 0xff07) == 0x4700) /* branch/exchange */
1151 {
1152 int Rm = (insn >> 3) & 15;
1153
1154 result = 3 + _interlock_use(Rm);
1155 }
1156 /* else UNDEFINED */
1157 break;
1158 }
1159 }
1160 break;
1161
1162 case 9: /* load from literal pool */
1163 {
1164 int Rd = (insn >> 8) & 7;
1165 _interlock_def(Rd,result+1);
1166 }
1167 break;
1168
1169 case 10:
1170 case 11: /* load/store register offset */
1171 {
1172 int Rd = (insn & 7);
1173 int Rn = (insn >> 3) & 7;
1174 int Rm = (insn >> 6) & 7;
1175
1176 result += _interlock_use(Rn) + _interlock_use(Rm);
1177
1178 switch ((insn >> 9) & 7)
1179 {
1180 case 0: /* STR */
1181 case 1: /* STRH */
1182 case 2: /* STRB */
1183 result += _interlock_use(Rd);
1184 break;
1185
1186 case 3: /* LDRSB */
1187 case 5: /* LDRH */
1188 case 6: /* LDRB */
1189 case 7: /* LDRSH */
1190 _interlock_def(Rd,result+2);
1191 break;
1192
1193 case 4: /* LDR */
1194 _interlock_def(Rd,result+1);
1195 }
1196 }
1197 break;
1198
1199 case 12: /* store word immediate offset */
1200 case 14: /* store byte immediate offset */
1201 {
1202 int Rd = (insn & 7);
1203 int Rn = (insn >> 3) & 7;
1204
1205 result += _interlock_use(Rd) + _interlock_use(Rn);
1206 }
1207 break;
1208
1209 case 13: /* load word immediate offset */
1210 {
1211 int Rd = (insn & 7);
1212 int Rn = (insn >> 3) & 7;
1213
1214 result += _interlock_use(Rn);
1215 _interlock_def(Rd,result+1);
1216 }
1217 break;
1218
1219 case 15: /* load byte immediate offset */
1220 {
1221 int Rd = (insn & 7);
1222 int Rn = (insn >> 3) & 7;
1223
1224 result += _interlock_use(Rn);
1225 _interlock_def(Rd,result+2);
1226 }
1227 break;
1228
1229 case 16: /* store halfword immediate offset */
1230 {
1231 int Rd = (insn & 7);
1232 int Rn = (insn >> 3) & 7;
1233
1234 result += _interlock_use(Rn) + _interlock_use(Rd);
1235 }
1236 break;
1237
1238 case 17: /* load halfword immediate offset */
1239 {
1240 int Rd = (insn & 7);
1241 int Rn = (insn >> 3) & 7;
1242
1243 result += _interlock_use(Rn);
1244 _interlock_def(Rd,result+2);
1245 }
1246 break;
1247
1248 case 18: /* store to stack */
1249 {
1250 int Rd = (insn >> 8) & 3;
1251 result += _interlock_use(Rd);
1252 }
1253 break;
1254
1255 case 19: /* load from stack */
1256 {
1257 int Rd = (insn >> 8) & 3;
1258 _interlock_def(Rd,result+1);
1259 }
1260 break;
1261
1262 case 20: /* add to PC */
1263 case 21: /* add to SP */
1264 {
1265 int Rd = (insn >> 8) & 3;
1266 result += _interlock_use(Rd);
1267 }
1268 break;
1269
1270 case 22:
1271 case 23: /* misc. instructions, table 6-2 */
1272 {
1273 if ((insn & 0xff00) == 0xb000) /* adjust stack pointer */
1274 {
1275 result += _interlock_use(14);
1276 }
1277 else if ((insn & 0x0600) == 0x0400) /* push pop register list */
1278 {
1279 uint32_t mask = insn & 0x01ff;
1280 int count, nn;
1281
1282 for (count = 0; mask; count++)
1283 mask &= (mask-1);
1284
1285 result = (count < 2) ? 2 : count;
1286
1287 if (insn & 0x0800) /* pop register list */
1288 {
1289 for (nn = 0; nn < 9; nn++)
1290 if (insn & (1 << nn))
1291 _interlock_def(nn, result);
1292 }
1293 else /* push register list */
1294 {
1295 for (nn = 0; nn < 9; nn++)
1296 if (insn & (1 << nn))
1297 result += _interlock_use(nn);
1298 }
1299 }
1300 /* else software breakpoint */
1301 }
1302 break;
1303
1304 case 24: /* store multiple */
1305 {
1306 int Rd = (insn >> 8) & 7;
1307 uint32_t mask = insn & 255;
1308 int count, nn;
1309
1310 for (count = 0; mask; count++)
1311 mask &= (mask-1);
1312
1313 result = (count < 2) ? 2 : count;
1314 result += _interlock_use(Rd);
1315
1316 for (nn = 0; nn < 8; nn++)
1317 if (insn & (1 << nn))
1318 result += _interlock_use(nn);
1319 }
1320 break;
1321
1322 case 25: /* load multiple */
1323 {
1324 int Rd = (insn >> 8) & 7;
1325 uint32_t mask = insn & 255;
1326 int count, nn;
1327
1328 for (count = 0; mask; count++)
1329 mask &= (mask-1);
1330
1331 result = (count < 2) ? 2 : count;
1332 result += _interlock_use(Rd);
1333
1334 for (nn = 0; nn < 8; nn++)
1335 if (insn & (1 << nn))
1336 _interlock_def(nn, result);
1337 }
1338 break;
1339
1340 case 26:
1341 case 27: /* conditional branch / undefined / software interrupt */
1342 switch ((insn >> 8) & 15)
1343 {
1344 case 14: /* UNDEFINED */
1345 case 15: /* SWI */
1346 break;
1347
1348 default: /* conditional branch */
1349 result = 3;
1350 }
1351 break;
1352
1353 case 28: /* unconditional branch */
1354 result = 3;
1355 break;
1356
1357 case 29: /* BLX suffix or undefined */
1358 if ((insn & 1) == 0)
1359 result = 3;
1360 break;
1361
1362 case 30: /* BLX/BLX prefix */
1363 break;
1364
1365 case 31: /* BL suffix */
1366 result = 3;
1367 break;
1368 }
1369 interlock_base += result;
1370 return result;
1371 #else /* old code */
1372 if ((insn & 0xfc00) == 0x4340) /* MUL */
1373 return TICKS_SMULxy;
1374
1375 return TICKS_OTHER;
1376 #endif
1377 }
1378
1379 // Adds an exception trace record.
trace_exception(uint32 target_pc)1380 void trace_exception(uint32 target_pc)
1381 {
1382 if (trace_exc.fstream == NULL)
1383 return;
1384
1385 // Sometimes we get an unexpected exception as the first record. If the
1386 // basic block number is zero, then we know it is bogus.
1387 if (trace_bb.current_bb_num == 0)
1388 return;
1389
1390 uint32_t current_pc = trace_bb.current_bb_addr + 4 * (trace_bb.num_insns - 1);
1391 #if 0
1392 if (ftrace_debug) {
1393 fprintf(ftrace_debug, "t%llu exc pc: 0x%x bb_addr: 0x%x num_insns: %d current_pc: 0x%x bb_num %llu bb_start_time %llu\n",
1394 sim_time, target_pc, trace_bb.current_bb_addr,
1395 trace_bb.num_insns, current_pc, trace_bb.current_bb_num,
1396 trace_bb.current_bb_start_time);
1397 }
1398 #endif
1399 char *comp_ptr = trace_exc.compressed_ptr;
1400 if (comp_ptr >= trace_exc.high_water_ptr) {
1401 uint32_t size = comp_ptr - trace_exc.compressed;
1402 fwrite(trace_exc.compressed, sizeof(char), size, trace_exc.fstream);
1403 comp_ptr = trace_exc.compressed;
1404 }
1405 uint64_t time_diff = sim_time - trace_exc.prev_time;
1406 trace_exc.prev_time = sim_time;
1407 uint64_t bb_recnum_diff = trace_bb.recnum - trace_exc.prev_bb_recnum;
1408 trace_exc.prev_bb_recnum = trace_bb.recnum;
1409 comp_ptr = varint_encode(time_diff, comp_ptr);
1410 comp_ptr = varint_encode(current_pc, comp_ptr);
1411 comp_ptr = varint_encode(bb_recnum_diff, comp_ptr);
1412 comp_ptr = varint_encode(target_pc, comp_ptr);
1413 comp_ptr = varint_encode(trace_bb.current_bb_num, comp_ptr);
1414 comp_ptr = varint_encode(trace_bb.current_bb_start_time, comp_ptr);
1415 comp_ptr = varint_encode(trace_bb.num_insns, comp_ptr);
1416 trace_exc.compressed_ptr = comp_ptr;
1417 }
1418
trace_pid_1arg(int pid,int rec_type)1419 void trace_pid_1arg(int pid, int rec_type)
1420 {
1421 if (trace_pid.fstream == NULL)
1422 return;
1423 char *comp_ptr = trace_pid.compressed_ptr;
1424 char *max_end_ptr = comp_ptr + kMaxPidCompressed;
1425 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1426 uint32_t size = comp_ptr - trace_pid.compressed;
1427 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1428 comp_ptr = trace_pid.compressed;
1429 }
1430 uint64_t time_diff = sim_time - trace_pid.prev_time;
1431 trace_pid.prev_time = sim_time;
1432 comp_ptr = varint_encode(time_diff, comp_ptr);
1433 comp_ptr = varint_encode(rec_type, comp_ptr);
1434 comp_ptr = varint_encode(pid, comp_ptr);
1435 trace_pid.compressed_ptr = comp_ptr;
1436 }
1437
trace_pid_2arg(int tgid,int pid,int rec_type)1438 void trace_pid_2arg(int tgid, int pid, int rec_type)
1439 {
1440 if (trace_pid.fstream == NULL)
1441 return;
1442 char *comp_ptr = trace_pid.compressed_ptr;
1443 char *max_end_ptr = comp_ptr + kMaxPid2Compressed;
1444 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1445 uint32_t size = comp_ptr - trace_pid.compressed;
1446 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1447 comp_ptr = trace_pid.compressed;
1448 }
1449 uint64_t time_diff = sim_time - trace_pid.prev_time;
1450 trace_pid.prev_time = sim_time;
1451 comp_ptr = varint_encode(time_diff, comp_ptr);
1452 comp_ptr = varint_encode(rec_type, comp_ptr);
1453 comp_ptr = varint_encode(tgid, comp_ptr);
1454 comp_ptr = varint_encode(pid, comp_ptr);
1455 trace_pid.compressed_ptr = comp_ptr;
1456 }
1457
trace_switch(int pid)1458 void trace_switch(int pid)
1459 {
1460 #if 0
1461 if (ftrace_debug && trace_pid.fstream)
1462 fprintf(ftrace_debug, "t%lld switch %d\n", sim_time, pid);
1463 #endif
1464 trace_pid_1arg(pid, kPidSwitch);
1465 current_pid = pid;
1466 }
1467
trace_fork(int tgid,int pid)1468 void trace_fork(int tgid, int pid)
1469 {
1470 #if 0
1471 if (ftrace_debug && trace_pid.fstream)
1472 fprintf(ftrace_debug, "t%lld fork %d\n", sim_time, pid);
1473 #endif
1474 trace_pid_2arg(tgid, pid, kPidFork);
1475 }
1476
trace_clone(int tgid,int pid)1477 void trace_clone(int tgid, int pid)
1478 {
1479 #if 0
1480 if (ftrace_debug && trace_pid.fstream)
1481 fprintf(ftrace_debug, "t%lld clone %d\n", sim_time, pid);
1482 #endif
1483 trace_pid_2arg(tgid, pid, kPidClone);
1484 }
1485
trace_exit(int exitcode)1486 void trace_exit(int exitcode)
1487 {
1488 #if 0
1489 if (ftrace_debug && trace_pid.fstream)
1490 fprintf(ftrace_debug, "t%lld exit %d\n", sim_time, exitcode);
1491 #endif
1492 trace_pid_1arg(exitcode, kPidExit);
1493 }
1494
trace_name(char * name)1495 void trace_name(char *name)
1496 {
1497 #if 0
1498 if (ftrace_debug && trace_pid.fstream) {
1499 fprintf(ftrace_debug, "t%lld pid %d name %s\n",
1500 sim_time, current_pid, name);
1501 }
1502 #endif
1503 if (trace_pid.fstream == NULL)
1504 return;
1505 int len = strlen(name);
1506 char *comp_ptr = trace_pid.compressed_ptr;
1507 char *max_end_ptr = comp_ptr + len + kMaxNameCompressed;
1508 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1509 uint32_t size = comp_ptr - trace_pid.compressed;
1510 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1511 comp_ptr = trace_pid.compressed;
1512 }
1513 uint64_t time_diff = sim_time - trace_pid.prev_time;
1514 trace_pid.prev_time = sim_time;
1515 comp_ptr = varint_encode(time_diff, comp_ptr);
1516 int rec_type = kPidName;
1517 comp_ptr = varint_encode(rec_type, comp_ptr);
1518 comp_ptr = varint_encode(current_pid, comp_ptr);
1519 comp_ptr = varint_encode(len, comp_ptr);
1520 strncpy(comp_ptr, name, len);
1521 comp_ptr += len;
1522 trace_pid.compressed_ptr = comp_ptr;
1523 }
1524
trace_execve(const char * argv,int len)1525 void trace_execve(const char *argv, int len)
1526 {
1527 int ii;
1528
1529 if (trace_pid.fstream == NULL)
1530 return;
1531 // Count the number of args
1532 int alen = 0;
1533 int sum_len = 0;
1534 int argc = 0;
1535 const char *ptr = argv;
1536 while (sum_len < len) {
1537 argc += 1;
1538 alen = strlen(ptr);
1539 ptr += alen + 1;
1540 sum_len += alen + 1;
1541 }
1542
1543 #if 0
1544 if (ftrace_debug) {
1545 fprintf(ftrace_debug, "t%lld argc: %d\n", sim_time, argc);
1546 alen = 0;
1547 ptr = argv;
1548 for (ii = 0; ii < argc; ++ii) {
1549 fprintf(ftrace_debug, " argv[%d]: %s\n", ii, ptr);
1550 alen = strlen(ptr);
1551 ptr += alen + 1;
1552 }
1553 }
1554 #endif
1555
1556 char *comp_ptr = trace_pid.compressed_ptr;
1557 char *max_end_ptr = comp_ptr + len + 5 * argc + kMaxExecArgsCompressed;
1558 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1559 uint32_t size = comp_ptr - trace_pid.compressed;
1560 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1561 comp_ptr = trace_pid.compressed;
1562 }
1563 uint64_t time_diff = sim_time - trace_pid.prev_time;
1564 trace_pid.prev_time = sim_time;
1565 comp_ptr = varint_encode(time_diff, comp_ptr);
1566 int rec_type = kPidExec;
1567 comp_ptr = varint_encode(rec_type, comp_ptr);
1568 comp_ptr = varint_encode(argc, comp_ptr);
1569
1570 ptr = argv;
1571 for (ii = 0; ii < argc; ++ii) {
1572 alen = strlen(ptr);
1573 comp_ptr = varint_encode(alen, comp_ptr);
1574 strncpy(comp_ptr, ptr, alen);
1575 comp_ptr += alen;
1576 ptr += alen + 1;
1577 }
1578 trace_pid.compressed_ptr = comp_ptr;
1579 }
1580
trace_mmap(unsigned long vstart,unsigned long vend,unsigned long offset,const char * path)1581 void trace_mmap(unsigned long vstart, unsigned long vend,
1582 unsigned long offset, const char *path)
1583 {
1584 if (trace_pid.fstream == NULL)
1585 return;
1586 #if 0
1587 if (ftrace_debug)
1588 fprintf(ftrace_debug, "t%lld mmap %08lx - %08lx, offset %08lx '%s'\n",
1589 sim_time, vstart, vend, offset, path);
1590 #endif
1591 int len = strlen(path);
1592 char *comp_ptr = trace_pid.compressed_ptr;
1593 char *max_end_ptr = comp_ptr + len + kMaxMmapCompressed;
1594 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1595 uint32_t size = comp_ptr - trace_pid.compressed;
1596 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1597 comp_ptr = trace_pid.compressed;
1598 }
1599 uint64_t time_diff = sim_time - trace_pid.prev_time;
1600 trace_pid.prev_time = sim_time;
1601 comp_ptr = varint_encode(time_diff, comp_ptr);
1602 int rec_type = kPidMmap;
1603 comp_ptr = varint_encode(rec_type, comp_ptr);
1604 comp_ptr = varint_encode(vstart, comp_ptr);
1605 comp_ptr = varint_encode(vend, comp_ptr);
1606 comp_ptr = varint_encode(offset, comp_ptr);
1607 comp_ptr = varint_encode(len, comp_ptr);
1608 strncpy(comp_ptr, path, len);
1609 trace_pid.compressed_ptr = comp_ptr + len;
1610 }
1611
trace_munmap(unsigned long vstart,unsigned long vend)1612 void trace_munmap(unsigned long vstart, unsigned long vend)
1613 {
1614 if (trace_pid.fstream == NULL)
1615 return;
1616 #if 0
1617 if (ftrace_debug)
1618 fprintf(ftrace_debug, "t%lld munmap %08lx - %08lx\n",
1619 sim_time, vstart, vend);
1620 #endif
1621 char *comp_ptr = trace_pid.compressed_ptr;
1622 char *max_end_ptr = comp_ptr + kMaxMunmapCompressed;
1623 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1624 uint32_t size = comp_ptr - trace_pid.compressed;
1625 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1626 comp_ptr = trace_pid.compressed;
1627 }
1628 uint64_t time_diff = sim_time - trace_pid.prev_time;
1629 trace_pid.prev_time = sim_time;
1630 comp_ptr = varint_encode(time_diff, comp_ptr);
1631 int rec_type = kPidMunmap;
1632 comp_ptr = varint_encode(rec_type, comp_ptr);
1633 comp_ptr = varint_encode(vstart, comp_ptr);
1634 comp_ptr = varint_encode(vend, comp_ptr);
1635 trace_pid.compressed_ptr = comp_ptr;
1636 }
1637
trace_dynamic_symbol_add(unsigned long vaddr,const char * name)1638 void trace_dynamic_symbol_add(unsigned long vaddr, const char *name)
1639 {
1640 if (trace_pid.fstream == NULL)
1641 return;
1642 #if 0
1643 if (ftrace_debug)
1644 fprintf(ftrace_debug, "t%lld sym %08lx '%s'\n", sim_time, vaddr, name);
1645 #endif
1646 int len = strlen(name);
1647 char *comp_ptr = trace_pid.compressed_ptr;
1648 char *max_end_ptr = comp_ptr + len + kMaxSymbolCompressed;
1649 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1650 uint32_t size = comp_ptr - trace_pid.compressed;
1651 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1652 comp_ptr = trace_pid.compressed;
1653 }
1654 uint64_t time_diff = sim_time - trace_pid.prev_time;
1655 trace_pid.prev_time = sim_time;
1656 comp_ptr = varint_encode(time_diff, comp_ptr);
1657 int rec_type = kPidSymbolAdd;
1658 comp_ptr = varint_encode(rec_type, comp_ptr);
1659 comp_ptr = varint_encode(vaddr, comp_ptr);
1660 comp_ptr = varint_encode(len, comp_ptr);
1661 strncpy(comp_ptr, name, len);
1662 trace_pid.compressed_ptr = comp_ptr + len;
1663 }
1664
trace_dynamic_symbol_remove(unsigned long vaddr)1665 void trace_dynamic_symbol_remove(unsigned long vaddr)
1666 {
1667 if (trace_pid.fstream == NULL)
1668 return;
1669 #if 0
1670 if (ftrace_debug)
1671 fprintf(ftrace_debug, "t%lld remove %08lx\n", sim_time, vaddr);
1672 #endif
1673 char *comp_ptr = trace_pid.compressed_ptr;
1674 char *max_end_ptr = comp_ptr + kMaxSymbolCompressed;
1675 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1676 uint32_t size = comp_ptr - trace_pid.compressed;
1677 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1678 comp_ptr = trace_pid.compressed;
1679 }
1680 uint64_t time_diff = sim_time - trace_pid.prev_time;
1681 trace_pid.prev_time = sim_time;
1682 comp_ptr = varint_encode(time_diff, comp_ptr);
1683 int rec_type = kPidSymbolRemove;
1684 comp_ptr = varint_encode(rec_type, comp_ptr);
1685 comp_ptr = varint_encode(vaddr, comp_ptr);
1686 trace_pid.compressed_ptr = comp_ptr;
1687 }
1688
trace_init_name(int tgid,int pid,const char * name)1689 void trace_init_name(int tgid, int pid, const char *name)
1690 {
1691 if (trace_pid.fstream == NULL)
1692 return;
1693 #if 0
1694 if (ftrace_debug)
1695 fprintf(ftrace_debug, "t%lld kthread %d %s\n", sim_time, pid, name);
1696 #endif
1697 int len = strlen(name);
1698 char *comp_ptr = trace_pid.compressed_ptr;
1699 char *max_end_ptr = comp_ptr + len + kMaxKthreadNameCompressed;
1700 if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1701 uint32_t size = comp_ptr - trace_pid.compressed;
1702 fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1703 comp_ptr = trace_pid.compressed;
1704 }
1705 uint64_t time_diff = sim_time - trace_pid.prev_time;
1706 trace_pid.prev_time = sim_time;
1707 comp_ptr = varint_encode(time_diff, comp_ptr);
1708 int rec_type = kPidKthreadName;
1709 comp_ptr = varint_encode(rec_type, comp_ptr);
1710 comp_ptr = varint_encode(tgid, comp_ptr);
1711 comp_ptr = varint_encode(pid, comp_ptr);
1712 comp_ptr = varint_encode(len, comp_ptr);
1713 strncpy(comp_ptr, name, len);
1714 trace_pid.compressed_ptr = comp_ptr + len;
1715 }
1716
trace_init_exec(unsigned long start,unsigned long end,unsigned long offset,const char * exe)1717 void trace_init_exec(unsigned long start, unsigned long end,
1718 unsigned long offset, const char *exe)
1719 {
1720 }
1721
1722 // This function is called by the generated code to record the basic
1723 // block number.
trace_bb_helper(uint64_t bb_num,TranslationBlock * tb)1724 void trace_bb_helper(uint64_t bb_num, TranslationBlock *tb)
1725 {
1726 BBRec *bb_rec = tb->bb_rec;
1727 uint64_t prev_time = tb->prev_time;
1728 trace_bb.current_bb_addr = tb->pc;
1729 trace_bb.current_bb_num = bb_num;
1730 trace_bb.current_bb_start_time = sim_time;
1731 trace_bb.num_insns = 0;
1732 trace_bb.recnum += 1;
1733
1734 #if 0
1735 if (ftrace_debug)
1736 fprintf(ftrace_debug, "t%lld %lld\n", sim_time, bb_num);
1737 #endif
1738 if (bb_rec && bb_rec->bb_num == bb_num && prev_time > trace_bb.flush_time) {
1739 uint64_t time_diff = sim_time - prev_time;
1740 if (bb_rec->repeat == 0) {
1741 bb_rec->repeat = 1;
1742 bb_rec->time_diff = time_diff;
1743 tb->prev_time = sim_time;
1744 return;
1745 } else if (time_diff == bb_rec->time_diff) {
1746 bb_rec->repeat += 1;
1747 tb->prev_time = sim_time;
1748 return;
1749 }
1750 }
1751
1752 BBRec *next = trace_bb.next;
1753 if (next == &trace_bb.buffer[kMaxNumBasicBlocks]) {
1754 BBRec *ptr;
1755 char *comp_ptr = trace_bb.compressed_ptr;
1756 int64_t prev_bb_num = trace_bb.prev_bb_num;
1757 uint64_t prev_bb_time = trace_bb.prev_bb_time;
1758 for (ptr = trace_bb.buffer; ptr != next; ++ptr) {
1759 if (comp_ptr >= trace_bb.high_water_ptr) {
1760 uint32_t size = comp_ptr - trace_bb.compressed;
1761 fwrite(trace_bb.compressed, sizeof(char), size, trace_bb.fstream);
1762 comp_ptr = trace_bb.compressed;
1763 }
1764 int64_t bb_diff = ptr->bb_num - prev_bb_num;
1765 prev_bb_num = ptr->bb_num;
1766 uint64_t time_diff = ptr->start_time - prev_bb_time;
1767 prev_bb_time = ptr->start_time;
1768 comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
1769 comp_ptr = varint_encode(time_diff, comp_ptr);
1770 comp_ptr = varint_encode(ptr->repeat, comp_ptr);
1771 if (ptr->repeat)
1772 comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
1773 }
1774 trace_bb.compressed_ptr = comp_ptr;
1775 trace_bb.prev_bb_num = prev_bb_num;
1776 trace_bb.prev_bb_time = prev_bb_time;
1777
1778 next = trace_bb.buffer;
1779 trace_bb.flush_time = sim_time;
1780 }
1781 tb->bb_rec = next;
1782 next->bb_num = bb_num;
1783 next->start_time = sim_time;
1784 next->time_diff = 0;
1785 next->repeat = 0;
1786 tb->prev_time = sim_time;
1787 next += 1;
1788 trace_bb.next = next;
1789 }
1790
1791 // This function is called by the generated code to record the simulation
1792 // time at the start of each instruction.
trace_insn_helper()1793 void trace_insn_helper()
1794 {
1795 InsnRec *current = trace_insn.current;
1796 uint64_t time_diff = sim_time - trace_insn.prev_time;
1797 trace_insn.prev_time = sim_time;
1798
1799 // Keep track of the number of traced instructions so far in this
1800 // basic block in case we get an exception in the middle of the bb.
1801 trace_bb.num_insns += 1;
1802
1803 #if 0
1804 if (ftrace_debug) {
1805 uint32_t current_pc = trace_bb.current_bb_addr + 4 * (trace_bb.num_insns - 1);
1806 fprintf(ftrace_debug, "%llu %x\n", sim_time, current_pc);
1807 }
1808 #endif
1809 if (time_diff == current->time_diff) {
1810 current->repeat += 1;
1811 if (current->repeat != 0)
1812 return;
1813
1814 // The repeat count wrapped around, so back up one and create
1815 // a new record.
1816 current->repeat -= 1;
1817 }
1818 current += 1;
1819
1820 if (current == &trace_insn.buffer[kInsnBufferSize]) {
1821 InsnRec *ptr;
1822 char *comp_ptr = trace_insn.compressed_ptr;
1823 for (ptr = trace_insn.buffer; ptr != current; ++ptr) {
1824 if (comp_ptr >= trace_insn.high_water_ptr) {
1825 uint32_t size = comp_ptr - trace_insn.compressed;
1826 uint32_t rval = fwrite(trace_insn.compressed, sizeof(char),
1827 size, trace_insn.fstream);
1828 if (rval != size) {
1829 fprintf(stderr, "fwrite() failed\n");
1830 perror(trace_insn.filename);
1831 exit(1);
1832 }
1833 comp_ptr = trace_insn.compressed;
1834 }
1835 comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
1836 comp_ptr = varint_encode(ptr->repeat, comp_ptr);
1837 }
1838 trace_insn.compressed_ptr = comp_ptr;
1839 current = trace_insn.buffer;
1840 }
1841 current->time_diff = time_diff;
1842 current->repeat = 0;
1843 trace_insn.current = current;
1844 }
1845
1846 // Adds an interpreted method trace record. Each trace record is a time
1847 // stamped entry or exit to a method in a language executed by a "virtual
1848 // machine". This allows profiling tools to show the method names instead
1849 // of the core virtual machine interpreter.
trace_interpreted_method(uint32_t addr,int call_type)1850 void trace_interpreted_method(uint32_t addr, int call_type)
1851 {
1852 if (trace_method.fstream == NULL)
1853 return;
1854 #if 0
1855 fprintf(stderr, "trace_method time: %llu p%d 0x%x %d\n",
1856 sim_time, current_pid, addr, call_type);
1857 #endif
1858 char *comp_ptr = trace_method.compressed_ptr;
1859 char *max_end_ptr = comp_ptr + kMaxMethodCompressed;
1860 if (max_end_ptr >= &trace_method.compressed[kCompressedSize]) {
1861 uint32_t size = comp_ptr - trace_method.compressed;
1862 fwrite(trace_method.compressed, sizeof(char), size, trace_method.fstream);
1863 comp_ptr = trace_method.compressed;
1864 }
1865 uint64_t time_diff = sim_time - trace_method.prev_time;
1866 trace_method.prev_time = sim_time;
1867
1868 int32_t addr_diff = addr - trace_method.prev_addr;
1869 trace_method.prev_addr = addr;
1870
1871 int32_t pid_diff = current_pid - trace_method.prev_pid;
1872 trace_method.prev_pid = current_pid;
1873
1874 comp_ptr = varint_encode(time_diff, comp_ptr);
1875 comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
1876 comp_ptr = varint_encode_signed(pid_diff, comp_ptr);
1877 comp_ptr = varint_encode(call_type, comp_ptr);
1878 trace_method.compressed_ptr = comp_ptr;
1879 }
1880