1syntax = "proto3"; 2 3package tensorflow.profiler.op_profile; 4 5// Profile is the top-level data that summarizes a program. 6message Profile { 7 reserved 2; 8 reserved "by_program_structure"; 9 reserved 3; 10 reserved "per_program"; 11 // Root of a profile broken down by instruction category. 12 Node by_category = 1; 13 // Root of a profile broken down by program. 14 Node by_program = 4; 15} 16 17// An entry in the profile tree. (An instruction, or set of instructions). 18message Node { 19 string name = 1; // Semantics depend on contents. 20 Metrics metrics = 2; // May be omitted e.g. for fused instructions. 21 repeated Node children = 3; // Subjected to pruning. 22 23 // Details about what this node represents. 24 oneof contents { 25 InstructionCategory category = 4; 26 XLAInstruction xla = 5; 27 } 28 29 int32 num_children = 6; // Total number of children before pruning. 30 // A category of XLA instructions. 31 // name is a descriptive string, like "data formatting". 32 message InstructionCategory { 33 } 34 // A single XLA instruction. 35 // name is the unique instruction id, like "%multiply.5". 36 message XLAInstruction { 37 string op = 1; // Opcode like %multiply 38 string expression = 2; // %multiply = [shape]multiply(operand1, operand2) 39 string provenance = 3; // Typically the TensorFlow operation name. 40 string category = 4; 41 // Describes the physical memory layout of the instruction's primary input. 42 // e.g. for a convolution, this analyzes the image and ignores the kernel. 43 LayoutAnalysis layout = 5; 44 message LayoutAnalysis { 45 // The physical data layout, from most-minor to most-major dimensions. 46 repeated Dimension dimensions = 1; 47 message Dimension { 48 int32 size = 1; // Size of the data in this dimension. 49 int32 alignment = 2; // Data must be padded to a multiple of alignment. 50 string semantics = 3; // What the dimension represents, e.g. "spatial". 51 } 52 } 53 } 54} 55 56// Measurements of an operation (or aggregated set of operations). 57// Metrics are always "total" rather than "self". 58message Metrics { 59 // Core-time taken by this operation, as a fraction of all operations. 60 double time = 1; 61 // Floating point computations performed by this operation, as a fraction of 62 // peak core FLOPS * program time. This representation has useful properties: 63 // - it is proportional to the number of floating point operations performed 64 // - utilization is flops/time 65 // - wasted potential flops is proportional to time - flops 66 // - it does not reveal the peak core FLOPS of the hardware 67 double flops = 2; 68 69 // The memory bandwidth used to load operands, as a fraction of 70 // thereotical memory bandwidth on the specific hardware. 71 double memory_bandwidth = 3; 72 73 double raw_time = 11; // Elapsed core-time in picoseconds. 74 double raw_flops = 12; // Total floating-point operations performed. 75 double raw_bytes_accessed = 13; // Total bytes accessed (include read/write). 76} 77