1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5// What the dimension represents, e.g. spatial, feature or batch. 6enum LayoutDimensionSemantics { 7 UNKNOWN_SEMANTICS = 0; 8 FEATURE = 1; 9 BATCH = 2; 10 SPATIAL = 3; 11} 12 13// Data layout of an op. 14message LayoutAnalysis { 15 // Physical data layout in each tensor dimension. 16 message Dimension { 17 // Size of the data in this dimension. 18 int32 size = 1; 19 // Data must be padded to a multiple of alignment. 20 int32 alignment = 2; 21 // What the dimension represents. 22 LayoutDimensionSemantics semantics = 3; 23 } 24 // The physical data layout, from most-minor to most-major dimensions. 25 repeated Dimension dimensions = 1; 26} 27 28// Metrics for an operation (accumulated over all occurrences). 29// Next ID: 21 30message OpMetrics { 31 // HLO module id. 0 for TF ops. 32 uint64 hlo_module_id = 13; 33 // Name of this op. 34 string name = 6; 35 // Long name of this op (e.g., HLO expression). 36 string long_name = 20; 37 // Category of this op. 38 string category = 11; 39 // Provenance of this op (e.g., if HLO op, original TF op). 40 string provenance = 12; 41 // Whether it is executed eagerly. 42 bool is_eager = 18; 43 // Number of executions. 44 uint32 occurrences = 3; 45 // Total time (self + children) in picoseconds. 46 uint64 time_ps = 7; 47 // Minimum time (self + children) among all occurrences. 48 uint64 min_time_ps = 17; 49 // Total self time in picoseconds. 50 uint64 self_time_ps = 1; 51 // Total FLOPs. 52 uint64 flops = 2; 53 // Total bytes accessed. 54 uint64 bytes_accessed = 5; 55 // Breakdown of memory accessed by operation type and memory space. 56 message MemoryAccessed { 57 enum OperationType { 58 UNKNOWN = 0; 59 READ = 1; 60 WRITE = 2; 61 } 62 OperationType operation_type = 1; 63 // Device-specific id of memory space. 64 uint64 memory_space = 2; 65 uint64 bytes_accessed = 3; 66 } 67 repeated MemoryAccessed memory_accessed_breakdown = 19; 68 // Total dma stall time in picoseconds. 69 uint64 dma_stall_ps = 10; 70 // The data layout for this op. Only set for convolution ops for now. 71 LayoutAnalysis layout = 14; 72 // Deduplicated HLO name for this op. Not set for TF ops. 73 string deduplicated_name = 15; 74 // Children of the op. e.g. fused ops if this op is fusion. 75 OpMetricsDb children = 16; 76 reserved 4, 8, 9; 77} 78 79// Statistics about the various precision used in computation. 80message PrecisionStats { 81 // Amount of time spent on 16-bit computation (in ps). 82 uint64 compute_16bit_ps = 1; 83 // Amount of time spent on 32-bit computation (in ps). 84 uint64 compute_32bit_ps = 2; 85} 86 87// A database for OpMetrics. 88// Next ID: 14 89message OpMetricsDb { 90 // A bunch of OpMetrics. 91 repeated OpMetrics metrics_db = 10; 92 // The total host infeed-enqueue duration in picoseconds. 93 uint64 total_host_infeed_enq_duration_ps = 2; 94 // The total of the difference between the start times of two 95 // consecutive infeed-enqueues (per host) in picoseconds. 96 uint64 total_host_infeed_enq_start_timestamp_ps_diff = 3; 97 // The total time in picoseconds. 98 uint64 total_time_ps = 11; 99 // The total time incurred by OPs in picoseconds. 100 uint64 total_op_time_ps = 12; 101 // Precision-related stats. 102 PrecisionStats precision_stats = 13; 103 reserved 1, 4, 5, 6, 7, 8, 9; 104} 105