• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Benchmark suite for KPL and feature column implementations."""
16import itertools
17import math
18import random
19import string
20import time
21
22import numpy as np
23
24from tensorflow.python import keras
25from tensorflow.python.compat import v2_compat
26from tensorflow.python.data.ops import dataset_ops as tf_data
27from tensorflow.python.ops.ragged import ragged_tensor
28from tensorflow.python.platform import test as tf_test
29
30# This is required as of 3/2021 because otherwise we drop into graph mode.
31v2_compat.enable_v2_behavior()
32
33
34class LayerBenchmark(tf_test.Benchmark):
35  """Benchmark the layer forward pass."""
36
37  def report(self, name, keras_time, fc_time, iters):
38    """Calculate and report benchmark statistics."""
39    extras = {
40        "fc_avg_time": fc_time,
41        "fc_vs_keras_sec": fc_time - keras_time,
42        "fc_vs_keras_pct": ((fc_time - keras_time) / fc_time) * 100,
43        "keras_faster_ratio": fc_time / keras_time
44    }
45    self.report_benchmark(
46        iters=iters, wall_time=keras_time, extras=extras, name=name)
47
48
49class StepTimingCallback(keras.callbacks.Callback):
50  """A callback that times non-warmup steps of a Keras predict call."""
51
52  def __init__(self):
53    self.t0 = None
54    self.steps = 0
55
56  def on_predict_batch_begin(self, batch_index, _):
57    if batch_index == 2:
58      self.t0 = time.time()
59    elif batch_index > 2:
60      self.steps += 1
61
62  def on_predict_end(self, _):
63    self.tn = time.time()
64    self.t_avg = (self.tn - self.t0) / self.steps
65
66
67def create_data(length, num_entries, max_value, dtype):
68  """Create a ragged tensor with random data entries."""
69  lengths = (np.random.random(size=num_entries) * length).astype(int)
70  total_length = np.sum(lengths)
71  values = (np.random.random(size=total_length) * max_value).astype(dtype)
72  return ragged_tensor.RaggedTensor.from_row_lengths(values, lengths)
73
74
75def create_string_data(length,
76                       num_entries,
77                       vocabulary,
78                       pct_oov,
79                       oov_string="__OOV__"):
80  """Create a ragged tensor with random data entries."""
81  lengths = (np.random.random(size=num_entries) * length).astype(int)
82  total_length = np.sum(lengths)
83  num_oovs = int(pct_oov * total_length)
84  values = []
85  for _ in range(total_length):
86    values.append(random.choice(vocabulary))
87
88  if pct_oov > 0:
89    oov_cadence = int(total_length / num_oovs)
90    idx = 0
91    for _ in range(num_oovs):
92      if idx < total_length:
93        values[idx] = oov_string
94      idx += oov_cadence
95
96  return ragged_tensor.RaggedTensor.from_row_lengths(values, lengths)
97
98
99def create_vocabulary(vocab_size):
100  base = len(string.ascii_letters)
101  n = math.ceil(math.log(vocab_size, base))
102  vocab = []
103  for i in range(1, n + 1):
104    for item in itertools.product(string.ascii_letters, repeat=i):
105      if len(vocab) >= vocab_size:
106        break
107      vocab.append("".join(item))
108  return vocab
109
110
111def run_keras(data, model, batch_size, num_runs, steps_per_repeat=100):
112  """Benchmark a Keras model."""
113  ds = tf_data.Dataset.from_tensor_slices(data).repeat().prefetch(
114      tf_data.AUTOTUNE).batch(batch_size).cache()
115  steps = 0
116  times = []
117  for _ in range(num_runs):
118    steps += steps_per_repeat
119    timer = StepTimingCallback()
120    # Benchmarked code begins here.
121    model.predict(ds, steps=steps, callbacks=[timer])
122    # Benchmarked code ends here.
123    times.append(timer.t_avg)
124  avg_time = np.mean(times)
125  return avg_time
126
127
128def run_fc(data, fc_fn, batch_size, num_runs, steps_per_repeat=100):
129  """Benchmark a Feature Column."""
130
131  ds = tf_data.Dataset.from_tensor_slices(data).repeat().prefetch(
132      tf_data.AUTOTUNE).batch(batch_size).cache()
133
134  # Trace the fc_fn
135  ds_iter = ds.__iter__()
136  fc_fn(next(ds_iter))
137  fc_starts = []
138  fc_ends = []
139  for _ in range(num_runs):
140    fc_starts.append(time.time())
141    # Benchmarked code begins here.
142    for _ in range(steps_per_repeat):
143      _ = fc_fn(next(ds_iter))
144    # Benchmarked code ends here.
145    fc_ends.append(time.time())
146  avg_per_step_time = (np.array(fc_ends) -
147                       np.array(fc_starts)) / steps_per_repeat
148  avg_time = np.mean(avg_per_step_time)
149  return avg_time
150