1syntax = "proto3"; 2 3package tensorflow.tfprof; 4option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/profiler/tfprof_log_go_proto"; 5 6import "tensorflow/core/framework/attr_value.proto"; 7import "tensorflow/core/framework/step_stats.proto"; 8 9// It specifies the Python callstack that creates an op. 10message CodeDef { 11 repeated Trace traces = 1; 12 message Trace { 13 string file = 1 [deprecated = true]; // deprecated by file_id. 14 int64 file_id = 6; 15 16 int32 lineno = 2; 17 18 string function = 3 [deprecated = true]; // deprecated by function_id. 19 int64 function_id = 7; 20 21 string line = 4 [deprecated = true]; // deprecated line_id. 22 int64 line_id = 8; 23 24 int32 func_start_line = 5; 25 } 26} 27 28message OpLogEntry { 29 // op name. 30 string name = 1; 31 // float_ops is filled by tfprof Python API when called. It requires the 32 // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are 33 // implemented. 34 int64 float_ops = 2; 35 // User can define extra op type information for an op. This allows the user 36 // to select a group of ops precisely using op_type as a key. 37 repeated string types = 3; 38 // Used to support tfprof "code" view. 39 CodeDef code_def = 4; 40} 41 42message OpLogProto { 43 repeated OpLogEntry log_entries = 1; 44 45 // Maps from id of CodeDef file,function,line to its string 46 // In the future can also map other id of other fields to string. 47 map<int64, string> id_to_string = 2; 48} 49 50// A proto representation of the profiler's profile. 51// It allows serialization, shipping around and deserialization of the profiles. 52// 53// Please don't depend on the internals of the profile proto. 54message ProfileProto { 55 map<int64, ProfileNode> nodes = 1; 56 // Whether or not has code traces. 57 bool has_trace = 2; 58 // Whether or not the TF device tracer fails to return accelerator 59 // information (which could lead to 0 accelerator execution time). 60 bool miss_accelerator_stream = 5; 61 // Traced steps. 62 repeated int64 steps = 3; 63 64 // Maps from id of CodeDef file,function,line to its string 65 // In the future can also map other id of other fields to string. 66 map<int64, string> id_to_string = 4; 67} 68 69message ProfileNode { 70 // graph node name. 71 string name = 1; 72 // graph operation type. 73 string op = 9; 74 // A unique id for the node. 75 int64 id = 13; 76 77 map<int32, int64> inputs = 2; 78 map<int32, Tuple> input_shapes = 16; 79 map<int32, int64> outputs = 3; 80 map<int32, Tuple> output_shapes = 15; 81 // A map from source node id to its output index to current node. 82 map<int64, int32> src_output_index = 14; 83 84 repeated int64 shape = 4; 85 repeated string op_types = 5; 86 string canonical_device = 6; 87 string host_device = 7; 88 89 int64 float_ops = 8; 90 91 CodeDef trace = 10; 92 map<string, AttrValue> attrs = 11; 93 94 map<int64, ExecProfile> execs = 12; 95} 96 97message ExecProfile { 98 // Can be larger than 1 if run multiple times in loop. 99 int64 run_count = 1; 100 // The earliest/latest time including scheduling and execution. 101 int64 all_start_micros = 2; 102 int64 latest_end_micros = 3; 103 104 // device -> vector of {op_start_micros, op_exec_micros} pairs. 105 // accelerator_execs: gpu:id/stream:all -> {op_start_micros, op_exec_micros} 106 // For accelerator, vector size can be larger than 1, multiple kernel fires 107 // or in tf.while_loop. 108 map<string, ExecTime> accelerator_execs = 4; 109 // cpu_execs: cpu/gpu:id -> {op_start_micros, op_exec_micros} 110 // For cpu, vector size can be larger than 1 if in tf.while_loop. 111 map<string, ExecTime> cpu_execs = 5; 112 113 // Each entry to memory information of a scheduling of the node. 114 // Normally, there will be multiple entries in while_loop. 115 repeated ExecMemory memory_execs = 7; 116 // The allocation and deallocation times and sizes throughout execution. 117 repeated AllocationRecord allocations = 11; 118 // The devices related to this execution. 119 repeated string devices = 6; 120} 121 122message ExecTime { 123 repeated Tuple times = 1; 124} 125 126message ExecMemory { 127 // This is the timestamp when the memory information was tracked. 128 int64 memory_micros = 1; 129 // NOTE: Please don't depend on the following 4 fields yet. Due to 130 // TensorFlow internal tracing issues, the numbers can be quite wrong. 131 // TODO(xpan): Fix the TensorFlow internal tracing. 132 int64 host_temp_bytes = 2; 133 int64 host_persistent_bytes = 3; 134 int64 accelerator_temp_bytes = 4; 135 int64 accelerator_persistent_bytes = 5; 136 137 // Total bytes requested by the op. 138 int64 requested_bytes = 6; 139 // Total bytes requested by the op and released before op end. 140 int64 peak_bytes = 7; 141 // Total bytes requested by the op and not released after op end. 142 int64 residual_bytes = 8; 143 // Total bytes output by the op (not necessarily requested by the op). 144 int64 output_bytes = 9; 145 // The total number of bytes currently allocated by the allocator if >0. 146 int64 allocator_bytes_in_use = 10; 147 // The memory of each output of the operation. 148 map<int32, Memory> output_memory = 11; 149} 150 151message Tuple { 152 repeated int64 int64_values = 1; 153} 154 155message Memory { 156 int64 bytes = 1; 157 uint64 ptr = 2; 158} 159