1""" NNAPI systrace parser - aggegation of timing from multiple threads """ 2 3# TODO: 4# - phase and layer totals 5import math 6 7from parser.naming import layers, phases, subphases 8from parser.naming import (PHASE_OVERALL, PHASE_TERMINATION, PHASE_WARMUP, 9 PHASE_BENCHMARK, PHASE_EXECUTION, PHASE_INITIALIZATION, 10 PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS) 11from parser.naming import LAYER_APPLICATION, LAYER_IPC, LAYER_DRIVER 12LAYER_TOTAL = "LT" # Total across layers 13 14def aggregate_times(tracker_map, special_case_lr_pe=True): 15 """ Takes the trackers for each thread and produces timing statistics for 16 all layers and phases. 17 18 Returns (times, self_times, has_warmup and has_benchmark, execution_counts), 19 where: 20 - times and self_times are nested dictionaries of the form 21 phase -> layer -> time with the following notes: 22 - phase is flattened over all phases, except PHASE_WARMUP and 23 PHASE_BENCHMARK, where the structure is phase -> phase -> layer -> time 24 - PHASE_WARMUP and PHASE_BENCHMARK only nest execution and its 25 subphases 26 - PHASE_WARMUP and PHASE_BENCHMARK are not present if the trace does 27 not contain them 28 - the first level phase contains total over PHASE_WARMUP and 29 PHASE_BENCHMARK if present 30 - time may be math.nan if the data is not present in the trace 31 - in addition to the layer from parser.naming, LAYER_TOTAL holds 32 the total time spent in that layer over all phases 33 - execution_counts contains a dictionary of the form 34 {PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK} -> no of executions 35 """ 36 all_application_phases = [PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK] 37 # Calculate execution counts 38 execution_counts = dict() 39 for app_phase in all_application_phases: 40 execution_count = 0 41 for pid in tracker_map: 42 execution_count = max(execution_count, tracker_map[pid].get_execution_count(app_phase)) 43 execution_counts[app_phase] = execution_count 44 has_warmup = bool(execution_counts[PHASE_WARMUP]) 45 has_benchmark = bool(execution_counts[PHASE_BENCHMARK]) 46 if not (has_warmup and has_benchmark): 47 all_application_phases = [PHASE_OVERALL] 48 49 # Create dicts 50 times = {} 51 self_times = {} 52 if has_warmup and has_benchmark: 53 for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]: 54 times[app_phase] = {} 55 self_times[app_phase] = {} 56 for phase in _phase_and_subphases(PHASE_EXECUTION): 57 times[app_phase][phase] = {} 58 self_times[app_phase][phase] = {} 59 for phase in phases + [PHASE_OVERALL] + subphases[PHASE_EXECUTION]: 60 times[phase] = {} 61 self_times[phase] = {} 62 63 # Gather total times from all threads, calculate layer and phase totals 64 for layer in layers: 65 for phase0 in [PHASE_OVERALL] + phases: 66 for phase in _phase_and_subphases(phase0): 67 t = 0.0 68 tag = layer + "_" + phase 69 for app_phase in all_application_phases: 70 t0 = 0.0 71 if layer == LAYER_DRIVER and phase == PHASE_EXECUTION: 72 # Calculate driver execution times from begins and ends 73 begins = [] 74 ends = [] 75 for pid in tracker_map: 76 begins = begins + tracker_map[pid].get_ld_pe_begins(app_phase) 77 ends = ends + tracker_map[pid].get_ld_pe_ends(app_phase) 78 assert len(begins) == len(ends) 79 begins.sort() 80 ends.sort() 81 for i in range(0, len(begins)): 82 t0 += (ends[i] - begins[i]) 83 else: 84 for pid in tracker_map: 85 t0 += tracker_map[pid].get_stat(tag, app_phase, special_case_lr_pe) 86 if phase0 == PHASE_EXECUTION and (app_phase != PHASE_OVERALL): 87 times[app_phase][phase][layer] = zero_to_nan_if_missing(t0, phase, layer) 88 t += t0 89 times[phase][layer] = zero_to_nan_if_missing(t, phase, layer) 90 if not times[PHASE_OVERALL][layer]: 91 times[PHASE_OVERALL][layer] = sum(nan_to_zero(times[phase][layer]) for phase in phases) 92 for phase0 in [PHASE_OVERALL] + phases: 93 for phase in _phase_and_subphases(phase0): 94 times[phase][LAYER_TOTAL] = max_ignoring_nans(times[phase].values()) 95 if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark): 96 for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]: 97 times[app_phase][phase][LAYER_TOTAL] = max_ignoring_nans(times[app_phase][phase].values()) 98 99 # Calculate self-times for each layer 100 for phase0 in [PHASE_OVERALL] + phases: 101 for phase in _phase_and_subphases(phase0): 102 self_times[phase][LAYER_TOTAL] = times[phase][LAYER_TOTAL] 103 if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark): 104 for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]: 105 self_times[app_phase][phase][LAYER_TOTAL] = times[app_phase][phase][LAYER_TOTAL] 106 t = 0.0 107 for layer in reversed(layers): 108 if math.isnan(times[phase][layer]): 109 self_times[phase][layer] = math.nan 110 elif times[phase][layer] == 0.0: 111 self_times[phase][layer] = 0.0 112 elif (phase == PHASE_OVERALL and 113 (layer == LAYER_DRIVER or layer == LAYER_IPC) and 114 times[PHASE_EXECUTION][LAYER_DRIVER] == 0.0): 115 # Driver was only used for initialization phase, did not support 116 # execution of the model 117 if layer == LAYER_DRIVER: 118 self_times[phase][layer] = times[phase][layer] 119 else: 120 self_times[phase][layer] = times[phase][layer] - times[phase][LAYER_DRIVER] 121 else: 122 self_times[phase][layer] = times[phase][layer] - t 123 t = times[phase][layer] 124 if phase0 == PHASE_EXECUTION and (has_benchmark or has_warmup): 125 for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]: 126 t = 0.0 127 for layer in reversed(layers): 128 if math.isnan(times[app_phase][phase][layer]): 129 self_times[app_phase][phase][layer] = math.nan 130 elif times[app_phase][phase][layer] == 0.0: 131 self_times[app_phase][phase][layer] = 0.0 132 else: 133 self_times[app_phase][phase][layer] = times[app_phase][phase][layer] - t 134 t = times[app_phase][phase][layer] 135 136 return (times, self_times, has_warmup and has_benchmark, execution_counts) 137 138def zero_to_nan_if_missing(f, phase, layer): 139 """ Turn zero time to a NaN to indicate missing data, when we think that 140 the data is really missing. Data should only be missing from the 141 Application layer (applications may not have any tracing) and 142 the subphases of Execution in the Driver layer (other phases are 143 discernible from the automatic HIDL tracepoints).""" 144 if f == 0.0: 145 if layer == LAYER_APPLICATION: 146 return math.nan 147 if layer == LAYER_DRIVER and phase in subphases[PHASE_EXECUTION]: 148 return math.nan 149 return f 150 151def nan_to_zero(f): 152 if math.isnan(f): 153 return 0.0 154 return f 155 156def _phase_and_subphases(phase): 157 if phase == PHASE_OVERALL: 158 return [phase] 159 if phase == PHASE_WARMUP or phase == PHASE_BENCHMARK: 160 return [] 161 return [phase] + subphases.get(phase, []) 162 163def max_ignoring_nans(xs): 164 return max(map(nan_to_zero, xs)) 165