• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1""" NNAPI Systrace parser - tracking of call tree based on trace lines
2
3    See contract-between-code-and-parser.txt for the
4    specification (cases in the specification are referred to with SPEC).
5"""
6
7import re
8from parser.naming import (subphases, translate_hidl_mark_to_nn_and_tag,
9                           get_function_name_from_mark, make_tag)
10from parser.naming import LAYER_CPU, LAYER_DRIVER, LAYER_RUNTIME, LAYER_APPLICATION
11from parser.naming import MARKER_SWITCH, MARKER_SUBTRACT
12from parser.naming import PHASE_EXECUTION, PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK
13from parser.tree import SingleThreadCallTree
14
15class AppPhase(object):
16  """ Class to track the overall phase of the program. Used to split up warmup and benchmark.
17
18      Needs to be separate from the call trees to propagate the difference to driver.
19  """
20  def __init__(self):
21    self.reset()
22
23  def current(self):
24    if self.stack:
25      return self.stack[-1]
26    else:
27      return PHASE_OVERALL
28
29  def push(self, phase):
30    self.stack.append(phase)
31
32  def pop(self):
33    self.stack.pop()
34
35  def reset(self):
36    self.stack = []
37
38class Tracker(object):
39  """ Class to track the stack trace of a single thread and feed it into a SingleThreadCallTree
40      as well as keeping track of entry and exit times for functions.
41
42      Exposes statistics for a single thread, transforming the call tree as needed.
43      All statistics are in milliseconds.
44
45      Layer Runtime, Phase Execution (LR_PE) is special-cased, see comment in get_stat().
46
47      Subphases of Execution are aggregated towards the overall Execution phase as needed.
48  """
49  def __init__(self, tgid, is_driver, app_phase):
50    self.tgid = tgid
51    self.is_driver = is_driver
52    self.app_phase = app_phase
53
54    # Match the trace string
55    # "[NN_LA_PP]funcE1" in "B|<thread1>|[NN_LA_PP]funcE1"
56    # "[NN_LC_PCO]funcC1" in "B|<thread1>|[SW][NN_LC_PCO]funcC1"
57    self.matcher = re.compile(r"B\|\d+\|.*\[([^]]+)\]\[?([^]])\]?")
58
59    self.reset()
60
61  def reset(self):
62    self.stats = {}
63    self.items = {}
64    self.mytree = SingleThreadCallTree()
65    self.begins_and_ends_ms = {}
66    self.la_pe_counts = {}
67    self.debugstring = "\n"
68
69  def handle_mark(self, time, mark):
70    """ Handle a single trace item (scoped entry and exit).
71        Translates:
72          - Automatically generated HIDL traces into NNTRACE layers and phases
73          - SPEC:Switch phase during function into dummy items
74          - SPEC:Subtracting time when nesting is violated into "subtract"
75            markers
76          - CPU/Driver layer distinction based on whether the process is the
77            driver or an application
78        This function is called multiple times for a single application run,
79        afterwards the statistics can be calculated.
80    """
81    if mark[0] == "B":
82      switch = False
83      subtract = False
84      # Workarounds for wrong tracepoints in early versions
85      # TODO(mikie): remove later
86      if ("ANeuralNetworksEvent_free" in mark) or ("ANeuralNetworksExecution_free" in mark):
87        mark = mark.replace("_PT", "_PE")
88      elif ("[SW][NN_LA_PR]executeWithCompilation" in mark):
89        mark = mark.replace("[SW]", "")
90      if MARKER_SWITCH in mark:
91        switch = True
92      if MARKER_SUBTRACT in mark:
93        subtract = True
94      if switch:
95        # End previous item
96        self.handle_mark(time, "E")
97        # Push a dummy item that will get popped by the 'real' end of the
98        # previous item.
99        self.mytree.push_dummy(time)
100      m = self.matcher.search(mark)
101      if m is None:
102        tag = translate_hidl_mark_to_nn_and_tag(mark)
103        if tag is None:
104          raise Exception("Couldn't parse mark " + mark)
105      else:
106        tag = m.group(1)
107      [_, layer, phase] = tag.split("_")
108      if layer == LAYER_APPLICATION and phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
109        self.app_phase.push(phase)
110      if not self.is_driver:
111        layer = layer.replace(LAYER_DRIVER, LAYER_CPU)
112      else:
113        layer = layer.replace(LAYER_CPU, LAYER_DRIVER)
114      if layer == LAYER_APPLICATION and phase == PHASE_EXECUTION:
115        self.la_pe_counts[self.app_phase.current()] = (
116            self.la_pe_counts.get(self.app_phase.current(), 0) + 1)
117      self.mytree.push(time, mark, layer, phase, self.app_phase.current(), subtract)
118    elif mark[0] == "E":
119      node = self.mytree.pop(time)
120      if node.is_dummy():  # Dummy item
121        pass
122      else:
123        if node.layer == LAYER_APPLICATION and node.phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
124          self.app_phase.pop()
125        function = node.app_phase + "::" + get_function_name_from_mark(node.mark)
126        self.begins_and_ends_ms[function] = (self.begins_and_ends_ms.get(function, []) +
127                                             [[float(node.start_time_s) * 1000.0,
128                                               float(node.end_time_s) * 1000.0]])
129
130  def is_complete(self):
131    """ Checks if we've seen all end tracepoints for the begin tracepoints.
132    """
133    return self.mytree.current.is_root()
134
135  def calculate_stats(self):
136    assert self.is_complete()
137    self.mytree.remove_ignored()
138    self.mytree.remove_dummies()
139    self.mytree.copy_subtracted_init_and_wrong_la()
140    self.mytree.add_missing_la_nodes()
141    # self.mytree.print()
142    self.mytree.validate_nesting()
143
144    def recurse(node, prev_layer, prev_phase, indent, in_pe_layers):
145      [begun, mark, layer, phase] = [
146          node.start_time_s, node.mark, node.layer, node.phase()]
147      time = node.end_time_s
148      tag = None
149      elapsed0 = "DETAIL"
150      elapsed1 = node.elapsed_less_subtracted_ms()
151      if elapsed1 is None:
152        raise Exception("Elapsed for {} returned None".format(node.to_str()))
153
154      if not node.is_added_detail() and not node.subtract:
155        tag = node.app_phase + "_" + layer + "_" + phase
156        elapsed0 = elapsed1
157        self.stats[tag] = self.stats.get(tag, 0.0) + elapsed0
158        self.items[tag] = self.items.get(tag, []) + [
159            mark + " " + str(elapsed0) + " " + str(elapsed1) + " " +  tag]
160        if phase in subphases[PHASE_EXECUTION]:
161          if not in_pe_layers.get(layer):
162            pe_tag = node.app_phase + "_" + make_tag(layer, PHASE_EXECUTION)
163            self.stats[pe_tag] = self.stats.get(pe_tag, 0.0) + elapsed0
164            self.items[pe_tag] = self.items.get(pe_tag, []) + [
165                mark + " " + str(elapsed0) + " " + str(elapsed1) + " " +  pe_tag]
166      if phase == PHASE_EXECUTION:
167        in_pe_layers[layer] = in_pe_layers.get(layer, 0) + 1
168      for c in node.children:
169        recurse(c, layer or prev_layer, phase or prev_phase,
170                indent + "  ", in_pe_layers)
171      if phase == PHASE_EXECUTION:
172        in_pe_layers[layer] = in_pe_layers[layer] - 1
173      return
174
175    for top in self.mytree.root.children:
176      recurse(top, None, None, "", {})
177    self.debugstring = self.mytree.to_str()
178
179  # We need to special case the driver execution time because:
180  # - The existing drivers don't have tracing, so we rely on HIDL traces
181  # - Best we can do is to take the start of the HIDL server side call as
182  #   the starting point (which includes a bit of overhead, but not much) and
183  #   the start of the callback as the end point (which should be pretty
184  #   accurate)
185  # Note that the begin and end may be on different threads, hence the
186  # calculation needs to happen in aggregation rather than here.
187  def get_ld_pe_begins(self, app_phase):
188      return self.get_begins(app_phase, "HIDL::IPreparedModel::execute::server")
189
190  def get_ld_pe_ends(self, app_phase):
191      return self.get_begins(app_phase, "HIDL::IExecutionCallback::notify::client")
192
193  def get_stat(self, tag, app_phase, special_case_pe=True):
194    if not self.stats and not self.mytree.is_empty():
195      self.calculate_stats()
196    if tag == make_tag(LAYER_RUNTIME, PHASE_EXECUTION) and special_case_pe:
197      # Execution is exposed as an asynchronous event from the runtime, we
198      # calculate the runtime time as starting from when the async operation is
199      # kicked off until wait finishes + synchronous setup and teardown calls.
200      # This has two limitations:
201      #   - multithreaded usage will not work correctly
202      #   - should the application spend so much time before calling wait that
203      #     execution has already finished, the time would get allocated to the
204      #     runtime incorrectly
205      async_starts = self.get_begins(app_phase, "ANeuralNetworksExecution_startCompute")
206      async_ends = self.get_ends(app_phase, "ANeuralNetworksEvent_wait")
207      elapsed = 0.0
208      for i in range(0, len(async_starts)):
209        elapsed = elapsed + (async_ends[i] - async_starts[i])
210      for sync in ["ANeuralNetworksExecution_create", "ANeuralNetworksExecution_free",
211                   "ANeuralNetworksEvent_create", "ANeuralNetworksEvent_free",
212                   "ANeuralNetworksExecution_setInput", "ANeuralNetworksExecution_setOutput",
213                   "ANeuralNetworksExecution_setInputFromMemory",
214                   "ANeuralNetworksExecution_setOutputFromMemory"]:
215        sync_starts = self.get_begins(app_phase, sync)
216        sync_ends = self.get_ends(app_phase, sync)
217        for i in range(0, len(sync_starts)):
218          elapsed = elapsed + (sync_ends[i] - sync_starts[i])
219      return elapsed
220    return self.stats.get(app_phase + "_" + tag, 0.0)
221
222  def get_execution_count(self, app_phase):
223    # ANeuralNetworksExecution_create is reliable and comes from the runtime,
224    # but not available pre-P
225    count = len(self.get_begins(app_phase, "ANeuralNetworksExecution_create"))
226    if count > 0:
227      return count
228    # Application may have added tracepoints
229    return self.la_pe_counts.get(app_phase, 0)
230
231  def get_begins(self, app_phase, function):
232    name = app_phase + "::" + function
233    return [begin_and_end[0] for begin_and_end in self.begins_and_ends_ms.get(name, [])]
234  def get_ends(self, app_phase, function):
235    name = app_phase + "::" + function
236    return [begin_and_end[1] for begin_and_end in self.begins_and_ends_ms.get(name, [])]
237
238  def print_stats(self):
239    if not self.stats:
240      self.calculate_stats()
241    print(self.tgid, "Driver" if self.is_driver else "App")
242    for tag in self.stats:
243      print(tag, self.stats[tag])
244      if self.items.get(tag):
245        for item in self.items[tag]:
246          print("  ", item)
247      else:
248        print("  ", "calculated only")
249
250  def print(self):
251    self.mytree.print()
252