# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Microbenchmarks for Keras components in eager mode."""

import time

import tensorflow as tf

from tensorflow.python.eager import context
from tensorflow.python.eager.context import get_executor
from tensorflow.python.keras.utils import tf_inspect
from tensorflow.python.platform import benchmark  # pylint: disable=unused-import


def _run_benchmark(func, num_iters, execution_mode=None):
  with context.execution_mode(execution_mode):
    # call func to warm up
    func()
    if execution_mode == context.ASYNC:
      get_executor().wait()
    start = time.time()
    for _ in range(num_iters):
      func()
    if execution_mode == context.ASYNC:
      get_executor().wait()
    end = time.time()

    return end - start


class MicroBenchmarksBase(tf.test.Benchmark):
  """Run and report benchmark results."""

  def run_report(self, run_benchmark, func, num_iters, execution_mode=None):
    """Run and report benchmark results."""
    total_time = run_benchmark(func, num_iters, execution_mode)
    mean_us = total_time * 1e6 / num_iters
    metrics = [{
        "name": "exp_per_sec",
        "value": float("{0:.3f}".format(num_iters / total_time))
    }, {
        "name": "us_per_exp",
        "value": float("{0:.3f}".format(total_time * 1e6 / num_iters))
    }]
    benchmark_name = self._get_benchmark_name()
    self.report_benchmark(
        iters=num_iters,
        wall_time=mean_us,
        metrics=metrics,
        name=benchmark_name)

  def _get_benchmark_name(self):
    """Mostly copied from benchmark.py _get_name()."""
    stack = tf_inspect.stack()
    name = None
    for frame in stack[::-1]:
      f_locals = frame[0].f_locals
      f_self = f_locals.get("self", None)
      if isinstance(f_self, tf.test.Benchmark):
        name = frame[3]  # Get the method name
        # This is a hack to get around the fact that some methods might have a
        # disable_tfrt decorator around them. In that case a function called
        # 'decorated' wraps the real called function underneath and so we
        # peek one deeper into the stack to get the real name.
        if name == "decorated":
          continue
        else:
          break
    if name is None:
      raise ValueError("Unable to determine calling Benchmark function.")
    if context.is_tfrt_enabled():
      name = name + "_tfrt"
    return name

  def _run(self, func, num_iters, execution_mode=None):
    self.run_report(_run_benchmark, func, num_iters, execution_mode)

  def benchmark_layers_call_overhead(self):

    class OnlyOverheadLayer(tf.keras.layers.Layer):

      def call(self, x):
        return x

    layer = OnlyOverheadLayer()
    x = tf.convert_to_tensor([[1.]])

    def fn():
      layer(x)  # pylint: disable=not-callable

    self._run(fn, 10000)

  def benchmark_op_layer_call_overhead(self):
    model_input = tf.keras.Input(shape=(1,))
    model_output = model_input
    x = tf.convert_to_tensor([[1.1]])

    for _ in range(20):
      model_output = tf.multiply(model_output, x)
    model = tf.keras.Model(inputs=model_input, outputs=model_output)

    def fn():
      model(x)  # pylint: disable=not-callable

    fn()
    self._run(fn, 100)

  def benchmark_model_predict_tensorlike_overhead(self):

    class OnlyOverheadLayer(tf.keras.layers.Layer):

      def call(self, x):
        return x

    model = tf.keras.Sequential([OnlyOverheadLayer()])
    x = tf.convert_to_tensor([[1.]])

    def fn():
      model.predict(x)

    self._run(fn, 20)

  def benchmark_layers_embeddings_embedding_overhead(self):

    layer = tf.keras.layers.Embedding(1, 1)
    x = tf.zeros((1, 1), dtype="int32")

    def fn():
      layer(x)

    self._run(fn, 10000)


class KerasLayerCallOverheadBenchmarks(  # pylint: disable=undefined-variable
    MicroBenchmarksBase, metaclass=benchmark.ParameterizedBenchmark):

  # The set of layers for benchmarking. To add benchmarks for new layers,
  # please add the parameter configs to "_benchmark_paramters".

  # The parameter of each layer benchmark is a tuple contains:
  # 1) The benchmark name with convention "{module_name}_{layer_name}";
  # 2) The layer instance;
  # 3) The shape of the input to the layer;
  # 4) The kwargs used in the benchmark. It can include the number of
  #    iterations to run the benchmarks, and kwargs used in the layer call.
  #    By default, # of iteratons is 10000.
  _benchmark_parameters = [
      ("advanced_activations_leaky_relu", tf.keras.layers.LeakyReLU(),
       (1, 1)),
      ("advanced_activations_prelu", tf.keras.layers.PReLU(), (1, 1)),
      ("advanced_activations_elu", tf.keras.layers.ELU(), (1, 1)),
      ("advanced_activations_thresholded_relu",
       tf.keras.layers.ThresholdedReLU(), (1, 1)),
      ("advanced_activations_softmax", tf.keras.layers.Softmax(), (1, 1)),
      ("advanced_activations_relu", tf.keras.layers.ReLU(), (1, 1)),
      ("core_masking", tf.keras.layers.Masking(), (1, 1)),
      ("core_dropout", tf.keras.layers.Dropout(0.5), (1, 1), {
          "training": True
      }),
      ("core_flatten", tf.keras.layers.Flatten(), (1, 1, 1)),
      ("core_dense", tf.keras.layers.Dense(1), (1, 1)),
      ("convolutional_conv1d", tf.keras.layers.Conv1D(1, (1,)), (1, 1, 1)),
      ("convolutional_conv2d", tf.keras.layers.Conv2D(1, (1, 1)), (1, 1, 1, 1)),
      ("convolutional_conv3d", tf.keras.layers.Conv3D(
          1, (1, 1, 1)), (1, 1, 1, 1, 1)),
      ("batch_norm_fused_inf", tf.keras.layers.BatchNormalization(fused=True),
       (1, 1, 1, 1)),
      ("batch_norm_fused_train", tf.keras.layers.BatchNormalization(fused=True),
       (1, 1, 1, 1), {"training": True}),
      ("batch_norm_nonfused_inf",
       tf.keras.layers.BatchNormalization(fused=False), (1, 1, 1, 1)),
      ("batch_norm_nonfused_train",
       tf.keras.layers.BatchNormalization(fused=False), (1, 1, 1, 1),
       {"training": True}),
      ("normalization_layer_normalization",
       tf.keras.layers.LayerNormalization(), (1, 1),
       {"iters": 100, "training": True}),
  ]

  def benchmark_layer(self, layer, input_shape, kwargs=None):

    x = tf.ones(input_shape)

    def fn():
      layer(x, **(kwargs or {}))

    default_iters = 10000
    iters = kwargs.pop("iters", default_iters) if kwargs else default_iters
    self._run(fn, iters)


if __name__ == "__main__":
  assert tf.executing_eagerly()
  tf.test.main()