1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Benchmark for Transpose op.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import time 22 23import numpy as np 24 25from tensorflow.python.client import session as session_lib 26from tensorflow.python.framework import constant_op 27from tensorflow.python.framework import ops 28from tensorflow.python.ops import array_ops 29from tensorflow.python.ops import control_flow_ops 30from tensorflow.python.ops import variables 31from tensorflow.python.platform import test 32 33 34def build_graph(device, input_shape, perm, datatype, num_iters): 35 """builds a graph containing a sequence of conv2d operations. 36 37 Args: 38 device: String, the device to run on. 39 input_shape: Shape of the input tensor. 40 perm: A list of ints with the same length as input tensor's dimension. 41 datatype: numpy data type of the input tensor. 42 num_iters: number of iterations to run transpose. 43 44 Returns: 45 An array of tensors to run() 46 """ 47 with ops.device("/%s:0" % device): 48 total_size = np.prod(input_shape) 49 inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape) 50 t = constant_op.constant(inp, shape=input_shape) 51 52 outputs = [] 53 transpose_op = array_ops.transpose(t, perm) 54 outputs.append(transpose_op) 55 for _ in range(1, num_iters): 56 with ops.control_dependencies([transpose_op]): 57 transpose_op = array_ops.transpose(t, perm) 58 outputs.append(transpose_op) 59 return control_flow_ops.group(*outputs) 60 61 62class TransposeBenchmark(test.Benchmark): 63 """Benchmark transpose!""" 64 65 def _run_graph(self, device, input_shape, perm, num_iters, datatype): 66 """runs the graph and print its execution time. 67 68 Args: 69 device: String, the device to run on. 70 input_shape: Shape of the input tensor. 71 perm: A list of ints with the same length as input tensor's dimension. 72 num_iters: Number of iterations to run the benchmark. 73 datatype: numpy data type of the input tensor. 74 75 Returns: 76 The duration of the run in seconds. 77 """ 78 graph = ops.Graph() 79 with graph.as_default(): 80 outputs = build_graph(device, input_shape, perm, datatype, num_iters) 81 with session_lib.Session(graph=graph) as session: 82 variables.global_variables_initializer().run() 83 # warmup runs 84 session.run(outputs) 85 start_time = time.time() 86 session.run(outputs) 87 88 duration = (time.time() - start_time) / num_iters 89 throughput = np.prod( 90 np.array(input_shape)) * datatype().itemsize * 2 / duration / 1e9 91 92 print("%s %s inputshape:%s perm:%s %d %.6fsec, %.4fGB/s." % 93 (device, str(datatype), str(input_shape).replace(" ", ""), 94 str(perm).replace(" ", ""), num_iters, duration, throughput)) 95 96 name_template = ( 97 "transpose_{device}_{dtype}_input_shape_{inputshape}_perm_{perm}") 98 99 self.report_benchmark( 100 name=name_template.format( 101 device=device, 102 dtype=str(datatype).replace(" ", ""), 103 inputshape=str(input_shape).replace(" ", ""), 104 perm=str(perm).replace(" ", "")).replace(" ", ""), 105 iters=num_iters, 106 wall_time=duration) 107 108 return duration 109 110 def benchmark_transpose(self): 111 print("transpose benchmark:") 112 113 datatypes = [np.complex128, np.float64, np.float32, np.float16, np.int8] 114 115 small_shapes = [[2, 20, 20, 20, 16], [2, 16, 20, 20, 20]] * 2 116 small_shapes += [[2, 100, 100, 16], [2, 16, 100, 100]] * 2 117 small_shapes += [[2, 5000, 16], [2, 16, 5000]] * 2 118 small_perms = [[0, 4, 1, 2, 3], [0, 2, 3, 4, 1]] + [[4, 1, 2, 3, 0]] * 2 119 small_perms += [[0, 3, 1, 2], [0, 2, 3, 1]] + [[3, 1, 2, 0]] * 2 120 small_perms += [[0, 2, 1]] * 2 + [[2, 1, 0]] * 2 121 122 large_shapes = [[2, 40, 40, 40, 32], [2, 40, 40, 40, 64]] * 2 + [[ 123 2, 300, 300, 32 124 ], [2, 300, 300, 64]] * 2 + [[2, 100000, 32], [2, 100000, 64]] * 2 125 large_perms = [[0, 4, 1, 2, 3], [0, 2, 3, 4, 1]] + [[4, 1, 2, 3, 0]] * 2 + [ 126 [0, 3, 1, 2], [0, 2, 3, 1] 127 ] + [[3, 1, 2, 0]] * 2 + [[0, 2, 1]] * 2 + [[2, 1, 0]] * 2 128 129 num_iters = 40 130 for datatype in datatypes: 131 for ishape, perm in zip(small_shapes, small_perms): 132 self._run_graph("gpu", ishape, perm, num_iters, datatype) 133 134 if datatype is not np.complex128: 135 if datatype is not np.float16: 136 for ishape, perm in zip(large_shapes, large_perms): 137 self._run_graph("gpu", ishape, perm, num_iters, datatype) 138 139 small_dim_large_shapes = [[2, 10000, 3], [2, 3, 10000], [2, 10000, 8], 140 [2, 8, 10000]] 141 small_dim_small_shapes = [[2, 5000, 3], [2, 3, 5000], [2, 5000, 8], 142 [2, 8, 5000]] 143 small_dim_perms = [[0, 2, 1]] * 4 144 145 num_iters = 320 146 small_dim_large_shape_datatypes = [np.float64, np.float32, np.int8] 147 for datatype in small_dim_large_shape_datatypes: 148 for ishape, perm in zip(small_dim_large_shapes, small_dim_perms): 149 self._run_graph("gpu", ishape, perm, num_iters, datatype) 150 151 small_dim_small_shape_datatypes = [np.complex128, np.float16] 152 for datatype in small_dim_small_shape_datatypes: 153 for ishape, perm in zip(small_dim_small_shapes, small_dim_perms): 154 self._run_graph("gpu", ishape, perm, num_iters, datatype) 155 156 157if __name__ == "__main__": 158 test.main() 159