1syntax = "proto3"; 2 3package tensorflow; 4 5import "tensorflow/core/framework/tensor.proto"; 6import "tensorflow/core/protobuf/graph_debug_info.proto"; 7 8option cc_enable_arenas = true; 9option java_outer_classname = "DebugEventProtos"; 10option java_multiple_files = true; 11option java_package = "org.tensorflow.util"; 12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; 13 14// Available modes for extracting debugging information from a Tensor. 15// TODO(cais): Document the detailed column names and semantics in a separate 16// markdown file once the implementation settles. 17enum TensorDebugMode { 18 UNSPECIFIED = 0; 19 20 // Only records what tensors are computed, eagerly or in graphs. 21 // No information regarding the value of the tensor is available. 22 NO_TENSOR = 1; 23 24 // A minimalist health summary for float-type tensors. 25 // Contains information only about the presence/absence of pathological 26 // values including Infinity and NaN. 27 // Applicable only to float dtypes. 28 CURT_HEALTH = 2; 29 30 // A concise health summary for float-type tensors. 31 // Contains more information that CURT_HEALTH. 32 // Infinity and NaN are treated differently. 33 // Applicable only to float and integer dtypes. 34 CONCISE_HEALTH = 3; 35 36 // A detailed health summary. 37 // Contains further detailed information than `CONCISE_HEALTH`. 38 // Information about device, dtype and shape are included. 39 // Counts for various types of values (Infinity, NaN, negative, zero, 40 // positive) are included. 41 // Applicable to float, integer and boolean dtypes. 42 FULL_HEALTH = 4; 43 44 // Provides full runtime shape information, up to a maximum rank, beyond 45 // which the dimension sizes are truncated. 46 SHAPE = 5; 47 48 // Full numeric summary. 49 // Including device, dtype, shape, counts of various types of values 50 // (Infinity, NaN, negative, zero, positive), and summary statistics 51 // (minimum, maximum, mean and variance). 52 // Applicable to float, integer and boolean dtypes. 53 FULL_NUMERICS = 6; 54 55 // Full tensor value. 56 FULL_TENSOR = 7; 57 58 // Reduce the elements of a tensor to a rank-1 tensor of shape [3], in which 59 // - the 1st element is -inf if any element of the tensor is -inf, 60 // or zero otherwise. 61 // - the 2nd element is +inf if any element of the tensor is +inf, 62 // or zero otherwise. 63 // - the 3rd element is nan if any element of the tensor is nan, or zero 64 // otherwise. 65 REDUCE_INF_NAN_THREE_SLOTS = 8; 66} 67 68// An Event related to the debugging of a TensorFlow program. 69message DebugEvent { 70 // Timestamp in seconds (with microsecond precision). 71 double wall_time = 1; 72 73 // Step of training (if available). 74 int64 step = 2; 75 76 oneof what { 77 // Metadata related to this debugging data. 78 DebugMetadata debug_metadata = 3; 79 80 // The content of a source file. 81 SourceFile source_file = 4; 82 83 // A stack frame (filename, line number and column number, function name and 84 // code string) with ID. 85 StackFrameWithId stack_frame_with_id = 6; 86 87 // The creation of an op within a graph (e.g., a FuncGraph compiled from 88 // a Python function). 89 GraphOpCreation graph_op_creation = 7; 90 91 // Information about a debugged graph. 92 DebuggedGraph debugged_graph = 8; 93 94 // Execution of an op or a Graph (e.g., a tf.function). 95 Execution execution = 9; 96 97 // A graph execution trace: Contains information about the intermediate 98 // tensors computed during the graph execution. 99 GraphExecutionTrace graph_execution_trace = 10; 100 101 // The ID of the graph (i.e., FuncGraph) executed here: applicable only 102 // to the execution of a FuncGraph. 103 string graph_id = 11; 104 105 // A device on which debugger-instrumented ops and/or tensors reside. 106 DebuggedDevice debugged_device = 12; 107 } 108} 109 110// Metadata about the debugger and the debugged TensorFlow program. 111message DebugMetadata { 112 // Version of TensorFlow. 113 string tensorflow_version = 1; 114 115 // Version of the DebugEvent file format. 116 // Has a format of "debug.Event:<number>", e.g., "debug.Event:1". 117 string file_version = 2; 118 119 // A unique ID for the current run of tfdbg. 120 // A run of tfdbg is defined as a TensorFlow job instrumented by tfdbg. 121 // Multiple hosts in a distributed TensorFlow job instrumented by tfdbg 122 // have the same ID. 123 string tfdbg_run_id = 3; 124} 125 126// Content of a source file involved in the execution of the debugged TensorFlow 127// program. 128message SourceFile { 129 // Path to the file. 130 string file_path = 1; 131 132 // Name of the host on which the file is located. 133 string host_name = 2; 134 135 // Line-by-line content of the file. 136 repeated string lines = 3; 137} 138 139// A stack frame with ID. 140message StackFrameWithId { 141 // A unique ID for the stack frame: A UUID-like string. 142 string id = 1; 143 144 // Stack frame, i.e., a frame of a stack trace, containing information 145 // regarding the file name, line number, function name, code content 146 // of the line, and column number (if available). 147 GraphDebugInfo.FileLineCol file_line_col = 2; 148} 149 150// Code location information: A stack trace with host-name information. 151// Instead of encoding the detailed stack trace, this proto refers to IDs of 152// stack frames stored as `StackFrameWithId` protos. 153message CodeLocation { 154 // Host name on which the source files are located. 155 string host_name = 1; 156 157 // ID to a stack frame, each of which is pointed to 158 // by a unique ID. The ordering of the frames is consistent with Python's 159 // `traceback.extract_tb()`. 160 repeated string stack_frame_ids = 2; 161} 162 163// The creation of an op in a TensorFlow Graph (e.g., FuncGraph in TF2). 164message GraphOpCreation { 165 // Type of the op (e.g., "MatMul"). 166 string op_type = 1; 167 168 // Name of the op (e.g., "Dense/MatMul_1"). 169 string op_name = 2; 170 171 // Name of the graph that the op is a part of (if available). 172 string graph_name = 3; 173 174 // Unique ID of the graph (generated by debugger). 175 // This is the ID of the immediately-enclosing graph. 176 string graph_id = 4; 177 178 // Name of the device that the op is assigned to (if available). 179 string device_name = 5; 180 181 // Names of the input tensors to the op. 182 repeated string input_names = 6; 183 184 // Number of output tensors emitted by the op. 185 int32 num_outputs = 7; 186 187 // The unique ID for code location (stack trace) of the op's creation. 188 CodeLocation code_location = 8; 189 190 // Unique IDs for the output tensors of this op. 191 repeated int32 output_tensor_ids = 9; 192} 193 194// A debugger-instrumented graph. 195message DebuggedGraph { 196 // An ID for the graph. 197 // This can be used up to look up graph names. Generated by the debugger. 198 string graph_id = 1; 199 200 // Name of the graph (if available). 201 string graph_name = 2; 202 203 // Names of the instrumented ops. This can be used to look up op name 204 // based on the numeric-summary tensors (2nd column). 205 repeated string instrumented_ops = 3; 206 207 // Original (uninstrumented) GraphDef (if available). 208 bytes original_graph_def = 4; 209 210 // An encoded version of a GraphDef. 211 // This graph may include the debugger-inserted ops. 212 bytes instrumented_graph_def = 5; 213 214 // IDs of the immediate enclosing context (graph), if any. 215 string outer_context_id = 6; 216} 217 218// A device on which ops and/or tensors are instrumented by the debugger. 219message DebuggedDevice { 220 // Name of the device. 221 string device_name = 1; 222 223 // A debugger-generated ID for the device. Guaranteed to be unique within 224 // the scope of the debugged TensorFlow program, including single-host and 225 // multi-host settings. 226 // TODO(cais): Test the uniqueness guarantee in multi-host settings. 227 int32 device_id = 2; 228} 229 230// Data relating to the eager execution of an op or a Graph. 231// For a op that generates N output tensors (N >= 0), only one 232// Execution proto will be used to describe the execution event. 233message Execution { 234 // Op type (e.g., "MatMul"). 235 // In the case of a Graph, this is the name of the Graph. 236 string op_type = 1; 237 238 // Number of output tensors. 239 int32 num_outputs = 2; 240 241 // The graph that's executed: applicable only to the eager 242 // execution of a FuncGraph. 243 string graph_id = 3; 244 245 // IDs of the input tensors (if available). 246 repeated int64 input_tensor_ids = 4; 247 248 // IDs of the output tensors (if availbable). 249 // If specified, must have the same length as tensor_protos. 250 repeated int64 output_tensor_ids = 5; 251 252 // Type of the tensor value encapsulated in this proto. 253 TensorDebugMode tensor_debug_mode = 6; 254 255 // Output Tensor values in the type described by `tensor_value_type`. 256 // The length of this should match `num_outputs`. 257 repeated TensorProto tensor_protos = 7; 258 259 // Stack trace of the eager execution. 260 CodeLocation code_location = 8; 261 262 // Debugged-generated IDs of the devices on which the output tensors reside. 263 // To look up details about the device (e.g., name), cross-reference this 264 // field with the DebuggedDevice messages. 265 repeated int32 output_tensor_device_ids = 9; 266 267 // TODO(cais): When backporting to V1 Session.run() support, add more fields 268 // such as fetches and feeds. 269} 270 271// Data relating to an execution of a Graph (e.g., an eager execution of a 272// FuncGraph). 273// The values of the intermediate tensors computed in the graph are recorded 274// in this proto. A graph execution may correspond to one or more pieces of 275// `GraphExecutionTrace`, depending on whether the instrumented tensor values 276// are summarized in an aggregated or separate fashion. 277message GraphExecutionTrace { 278 // Unique ID of the context that the executed op(s) belong to (e.g., a 279 // compiled concrete tf.function). 280 string tfdbg_context_id = 1; 281 282 // Name of the op (applicable only in the case of the `FULL_TENSOR` trace 283 // level). 284 string op_name = 2; 285 286 // Output slot of the tensor (applicable only in the case of the `FULL_TENSOR` 287 // trace level). 288 int32 output_slot = 3; 289 290 // Type of the tensor value encapsulated in this proto. 291 TensorDebugMode tensor_debug_mode = 4; 292 293 // Tensor value in the type described by `tensor_value_type`. 294 // This tensor may summarize the value of a single intermediate op of the 295 // graph, or those of multiple intermediate tensors. 296 TensorProto tensor_proto = 5; 297 298 // Name of the device that the op belongs to. 299 string device_name = 6; 300} 301