• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Analyzes the latency and numerics information of sample model inference."""
16
17import itertools
18import json
19from typing import Any, Callable, Optional, Sequence, Tuple, Union
20
21from absl import logging
22import numpy as np
23
24from tensorflow.python.compiler.tensorrt.model_tests import model_handler
25import tensorflow.python.compiler.tensorrt.trt_convert as trt
26
27
28# pylint: disable=bad-whitespace
29class DataFrame:
30  """Lightweight immutable Dataframe similar to Pandas Dataframe."""
31
32  def __init__(self,
33               column_names: Sequence[str],
34               rows: Sequence[Sequence[Any]] = None,
35               columns: Sequence[Sequence[Any]] = None):
36    self._column_names = column_names
37    if not rows and not columns:
38      raise ValueError("Cannot initialize with empty data!")
39    self._rows = rows
40    self._columns = columns
41
42  @property
43  def n_rows(self) -> int:
44    return len(self._rows) if self._rows else len(self._columns[0])
45
46  @property
47  def n_columns(self) -> int:
48    return len(self._columns) if self._columns else len(self._rows[0])
49
50  @property
51  def column_names(self) -> Sequence[str]:
52    return self._column_names
53
54  @property
55  def rows(self) -> Sequence[Sequence[Any]]:
56    return self._rows if self._rows else [
57        [c[i] for c in self._columns] for i in range(len(self._columns[0]))
58    ]
59
60  @property
61  def columns(self) -> Sequence[Sequence[Any]]:
62    return self._columns if self._columns else [
63        [r[i] for r in self._rows] for i in range(len(self._rows[0]))
64    ]
65
66  def __add__(self, other: "DataFrame") -> "DataFrame":
67    if (not set(self.column_names).intersection(other.column_names) and
68        len(self.rows) == len(other.rows)):
69      return DataFrame(
70          column_names=list(
71              itertools.chain(self.column_names, other.column_names)),
72          columns=list(itertools.chain(self.columns, other.columns)))
73    if self.column_names == other.column_names:
74      return DataFrame(
75          column_names=self.column_names,
76          rows=list(itertools.chain(self.rows, other.rows)))
77    raise ValueError("Cannot combine two DataFrame")
78
79  def __iadd__(self, other: "DataFrame") -> "DataFrame":
80    tmp = self + other
81    self._column_names = tmp._column_names
82    self._rows, self._columns = tmp._rows, tmp._columns
83    return self
84
85  def __call__(self, r: int, c: Optional[Union[int, str]] = None) -> Any:
86    if c is None:
87      return dict(zip(self.column_names, self.rows[r]))
88    c = self._column_names.index(c) if isinstance(c, str) else c
89    return self._rows[r][c] if self._rows else self._columns[c][r]
90
91  def __str__(self) -> str:
92    return ",".join(self.column_names) + "\n" + "\n".join(",".join(
93        "N/A" if v is None else str(v) for v in row) for row in self.rows)
94
95  def to_csv(self, path: str):
96    with open(path, "w") as file:
97      file.write(str(self))
98
99  def to_json(self, path: str):
100    with open(path, "w") as file:
101      json.dump([dict(zip(self.column_names, r)) for r in self.rows], file)
102
103
104def extract_test_info(
105    test_results: model_handler.TestResultCollection) -> DataFrame:
106  """Extracts the test infomation."""
107  column_names = list(
108      itertools.chain(model_handler.ModelConfig._fields,
109                      ["enable_gpu", "trt_model"],
110                      trt.TrtConversionParams._fields))
111  rows = []
112  for result in test_results.results:
113    r = list(result.model_config) + [result.enable_gpu]
114    if result.trt_convert_params is not None:
115      r += [True] + list(result.trt_convert_params)
116    else:
117      r += [False] + [None for _ in trt.TrtConversionParams._fields]
118    rows.append(r)
119  return DataFrame(column_names=column_names, rows=rows)
120
121
122def analyze_test_latency(test_results: model_handler.TestResultCollection,
123                         use_cpu_baseline: bool) -> DataFrame:
124  """Analyzes test latency."""
125  base_result = (
126      test_results.cpu_base_result
127      if use_cpu_baseline else test_results.gpu_base_result)
128  if base_result is None:
129    raise ValueError(
130        f"No {'CPU' if use_cpu_baseline else 'GPU'} baseline found!")
131  base_mean_time = np.asscalar(np.mean(base_result.model_latency))
132  column_names = ["time(ms)", "speedup"]
133  rows = []
134  for result in test_results.results:
135    mean_time = np.asscalar(np.mean(result.model_latency))
136    rows.append([mean_time * 1000.0, base_mean_time / mean_time])
137  return DataFrame(column_names=column_names, rows=rows)
138
139
140def analyze_test_numerics(test_results: model_handler.TestResultCollection,
141                          use_cpu_baseline: bool) -> DataFrame:
142  """Analyzes test numerics."""
143  preprocess_funcs = {
144      "diff": lambda x, y: np.fabs(x - y),
145      # Ensures dividends are not zero to avoid exceptions/NaNs.
146      "rel_diff": lambda x, y: np.fabs(x - y) / np.fmax(np.fabs(y), 1.0e-6)
147  }
148  postprocess_funcs = {"mean": np.mean, "std": np.std}
149  column_names = []
150  columns = []
151  base_result = (
152      test_results.cpu_base_result
153      if use_cpu_baseline else test_results.gpu_base_result)
154  if base_result is None:
155    raise ValueError(
156        f"No {'CPU' if use_cpu_baseline else 'GPU'} baseline found!")
157  for fn0, fn1 in itertools.product(preprocess_funcs, postprocess_funcs):
158    func0, func1 = preprocess_funcs[fn0], postprocess_funcs[fn1]
159    column_names.append("{}_{}".format(fn0, fn1))
160    columns.append([])
161    for result in test_results.results:
162      columns[-1].append(dict())
163      for idx, tensor in enumerate(result.output_tensors):
164        name = base_result.output_names[idx]
165        cpu_tensor = base_result.output_tensors[idx]
166        metric_value = np.asscalar(func1(func0(tensor, cpu_tensor)))
167        columns[-1][-1][name] = metric_value
168  return DataFrame(column_names=column_names, columns=columns)
169
170
171def check_column(df: DataFrame, name: str, fn: Callable[[float], bool]) -> bool:
172  """Checks the values of a column using a custom function and logs abnormals.
173
174  The check is only performed on TensorRT models, not native CPU/GPU models.
175
176  Args:
177    df: The DataFrame to be checked.
178    name: The name of the column to be checked.
179    fn: The function that takes a value of at the specified column and returns
180      if the value statisfies the check.
181
182  Returns:
183    Whether all the values of the specified column satisfies the provided check.
184  """
185  is_ok = True
186  for r in range(df.n_rows):
187    if df(r, "trt_model"):
188      if not fn(df(r, name)):
189        logging.error("Unsatisfied %s found at: %s", name, df(r))
190        is_ok = False
191  return is_ok
192
193
194class ResultAnalyzer:
195  """Analyzes ModelHandlerManager results."""
196
197  def __init__(
198      self,
199      use_cpu_latency_baseline: bool,
200      use_cpu_numerics_baseline: bool,
201      checkers: Sequence[Callable[[DataFrame], bool]],
202  ):
203    self._use_cpu_latency_baseline = use_cpu_latency_baseline
204    self._use_cpu_numerics_baseline = use_cpu_numerics_baseline
205    self._checkers = checkers
206
207  def analysis(
208      self, test_results: model_handler.TestResultCollection
209  ) -> Tuple[DataFrame, Sequence[bool]]:
210    df = extract_test_info(test_results)
211    df += analyze_test_latency(test_results, self._use_cpu_latency_baseline)
212    df += analyze_test_numerics(test_results, self._use_cpu_numerics_baseline)
213    checks = [c(df) for c in self._checkers]
214    return df, checks
215