1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Benchmark suite for KPL and feature column implementations.""" 16import itertools 17import math 18import random 19import string 20import time 21 22import numpy as np 23 24from tensorflow.python import keras 25from tensorflow.python.compat import v2_compat 26from tensorflow.python.data.ops import dataset_ops as tf_data 27from tensorflow.python.ops.ragged import ragged_tensor 28from tensorflow.python.platform import test as tf_test 29 30# This is required as of 3/2021 because otherwise we drop into graph mode. 31v2_compat.enable_v2_behavior() 32 33 34class LayerBenchmark(tf_test.Benchmark): 35 """Benchmark the layer forward pass.""" 36 37 def report(self, name, keras_time, fc_time, iters): 38 """Calculate and report benchmark statistics.""" 39 extras = { 40 "fc_avg_time": fc_time, 41 "fc_vs_keras_sec": fc_time - keras_time, 42 "fc_vs_keras_pct": ((fc_time - keras_time) / fc_time) * 100, 43 "keras_faster_ratio": fc_time / keras_time 44 } 45 self.report_benchmark( 46 iters=iters, wall_time=keras_time, extras=extras, name=name) 47 48 49class StepTimingCallback(keras.callbacks.Callback): 50 """A callback that times non-warmup steps of a Keras predict call.""" 51 52 def __init__(self): 53 self.t0 = None 54 self.steps = 0 55 56 def on_predict_batch_begin(self, batch_index, _): 57 if batch_index == 2: 58 self.t0 = time.time() 59 elif batch_index > 2: 60 self.steps += 1 61 62 def on_predict_end(self, _): 63 self.tn = time.time() 64 self.t_avg = (self.tn - self.t0) / self.steps 65 66 67def create_data(length, num_entries, max_value, dtype): 68 """Create a ragged tensor with random data entries.""" 69 lengths = (np.random.random(size=num_entries) * length).astype(int) 70 total_length = np.sum(lengths) 71 values = (np.random.random(size=total_length) * max_value).astype(dtype) 72 return ragged_tensor.RaggedTensor.from_row_lengths(values, lengths) 73 74 75def create_string_data(length, 76 num_entries, 77 vocabulary, 78 pct_oov, 79 oov_string="__OOV__"): 80 """Create a ragged tensor with random data entries.""" 81 lengths = (np.random.random(size=num_entries) * length).astype(int) 82 total_length = np.sum(lengths) 83 num_oovs = int(pct_oov * total_length) 84 values = [] 85 for _ in range(total_length): 86 values.append(random.choice(vocabulary)) 87 88 if pct_oov > 0: 89 oov_cadence = int(total_length / num_oovs) 90 idx = 0 91 for _ in range(num_oovs): 92 if idx < total_length: 93 values[idx] = oov_string 94 idx += oov_cadence 95 96 return ragged_tensor.RaggedTensor.from_row_lengths(values, lengths) 97 98 99def create_vocabulary(vocab_size): 100 base = len(string.ascii_letters) 101 n = math.ceil(math.log(vocab_size, base)) 102 vocab = [] 103 for i in range(1, n + 1): 104 for item in itertools.product(string.ascii_letters, repeat=i): 105 if len(vocab) >= vocab_size: 106 break 107 vocab.append("".join(item)) 108 return vocab 109 110 111def run_keras(data, model, batch_size, num_runs, steps_per_repeat=100): 112 """Benchmark a Keras model.""" 113 ds = tf_data.Dataset.from_tensor_slices(data).repeat().prefetch( 114 tf_data.AUTOTUNE).batch(batch_size).cache() 115 steps = 0 116 times = [] 117 for _ in range(num_runs): 118 steps += steps_per_repeat 119 timer = StepTimingCallback() 120 # Benchmarked code begins here. 121 model.predict(ds, steps=steps, callbacks=[timer]) 122 # Benchmarked code ends here. 123 times.append(timer.t_avg) 124 avg_time = np.mean(times) 125 return avg_time 126 127 128def run_fc(data, fc_fn, batch_size, num_runs, steps_per_repeat=100): 129 """Benchmark a Feature Column.""" 130 131 ds = tf_data.Dataset.from_tensor_slices(data).repeat().prefetch( 132 tf_data.AUTOTUNE).batch(batch_size).cache() 133 134 # Trace the fc_fn 135 ds_iter = ds.__iter__() 136 fc_fn(next(ds_iter)) 137 fc_starts = [] 138 fc_ends = [] 139 for _ in range(num_runs): 140 fc_starts.append(time.time()) 141 # Benchmarked code begins here. 142 for _ in range(steps_per_repeat): 143 _ = fc_fn(next(ds_iter)) 144 # Benchmarked code ends here. 145 fc_ends.append(time.time()) 146 avg_per_step_time = (np.array(fc_ends) - 147 np.array(fc_starts)) / steps_per_repeat 148 avg_time = np.mean(avg_per_step_time) 149 return avg_time 150