• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "google/protobuf/any.proto";
6import "tensorflow/core/profiler/protobuf/diagnostics.proto";
7
8// Generic hardware bottleneck.
9message BottleneckAnalysis {
10  // Percentage of step time that is spent on input.
11  double input_percent = 7;
12  // Percentage of step time that is spent on output.
13  double output_percent = 8;
14  // Percentage of step time that is idle for non-I/O-related reason.
15  double idle_percent = 9;
16  // Percentage of step time that is spent on compute.
17  double compute_percent = 10;
18  // Indicates if input is a bottleneck. Possible values:  "host", "device",
19  // "both", or "unknown"
20  string input_classification = 1;
21  // A human-readable description of the input bottleneck.
22  string input_statement = 2;
23  // Indicates if kernel launching is a bottleneck. Possible values: "no",
24  // "moderate", "high".
25  string kernel_launch_classification = 3;
26  // A human-readable description of the kernel launching overhead.
27  string kernel_launch_statement = 4;
28  // Indicates if all other is a bottleneck. Possible values: "no", "moderate",
29  // "high".
30  string all_other_classification = 5;
31  // A human-readable description of the all other overhead.
32  string all_other_statement = 6;
33  // Indicates if device collective communication is a bottleneck. Possible
34  // values: "no", "moderate", "high".
35  string device_collectives_classification = 11;
36  // A human-readable description of the device collective communication
37  // overhead.
38  string device_collectives_statement = 12;
39}
40
41// Used for both step duration and Op duration.
42message StepSummary {
43  double average = 1;
44  double standard_deviation = 2;
45  double minimum = 3;
46  double maximum = 4;
47}
48
49// Per-step details on generic hardware.
50message PerGenericStepDetails {
51  // The step number of a step.
52  int32 step_number = 1;
53  // The step name.
54  string step_name = 14;
55  // The step time (in ms).
56  double step_time_ms = 2;
57  // Breakdown of the step time in different event categories.
58  // The unknown time (in ms).
59  double unknown_time_ms = 3;
60  // The time (in ms) in which the host is waiting for input data to be ready.
61  double host_wait_input_ms = 11;
62  // The time (in ms) in which the host is sending input data to the device.
63  // Total input time = host_wait_input_ms + host_to_device_ms.
64  double host_to_device_ms = 12;
65  // The output time (in ms).
66  double output_ms = 5;
67  // The device-compute time (in ms).
68  double device_compute_ms = 6;
69  // The device-to-device communication time (in ms).
70  double device_to_device_ms = 7;
71  // The device time spent on collective communications (in ms).
72  double device_collectives_ms = 13;
73  // The host-compute time (in ms).
74  double host_compute_ms = 8;
75  // The host-prepare time (in ms).
76  double host_prepare_ms = 9;
77  // The time spent on compiling (in ms).
78  double host_compile_ms = 10;
79  reserved 4;
80}
81
82message InputTimeBreakdown {
83  // Time spent on demanded file read in microseconds.
84  double demanded_file_read_us = 1;
85  // Time spent on advanced file read in microseconds.
86  double advanced_file_read_us = 2;
87  // Time spent on data preprocessing in microseconds.
88  double preprocessing_us = 3;
89  // The infeed enqueue time in microseconds.
90  double enqueue_us = 4;
91  // This entry is for the situtation where we can't further
92  // break down the non-enqueue input time (because the input pipeline
93  // is not instrumented).
94  double unclassified_non_enqueue_us = 5;
95}
96
97message InputOpDetails {
98  // The Op's name.
99  string op_name = 1;
100  // The number of occurrences.
101  uint64 count = 2;
102  // Time (accumulated over all occurrences) in milliseconds.
103  double time_in_ms = 3;
104  // Time (accumulated over all occurrences) in
105  // percentage of the total input processing time.
106  double time_in_percent = 4;
107  // Self time (accumulated over all occurrences) in milliseconds.
108  double self_time_in_ms = 5;
109  // Self time (accumulated over all occurrences) in
110  // percentage of the total input processing time.
111  double self_time_in_percent = 6;
112  // Possible categories: "Enqueue", "Advanced file read",
113  // "Demanded file read", "Preprocessing", "Unknown".
114  string category = 7;
115}
116
117message InputPipelineAnalysisRecommendation {
118  // A list of detailed recommendations.
119  repeated string details = 1;
120  // An analysis of different types of bottlenecks. Can be unpacked into a
121  // BottleneckAnalysis.
122  google.protobuf.Any bottleneck_analysis = 2;
123  // A suggested step to take next.
124  string summary_next_step = 3;
125}
126
127message GenericStepTimeBreakdown {
128  // Summary of all unknown time as a part of step in ms.
129  StepSummary unknown_time_ms_summary = 1;
130  // Summary of all host-wait-input time as a part of step in ms.
131  StepSummary host_wait_input_ms_summary = 9;
132  // Summary of all host-to-device time as a part of step in ms.
133  StepSummary host_to_device_ms_summary = 10;
134  // Summary of all input time as a part of step in ms.
135  StepSummary input_ms_summary = 11;
136  // Summary of all output time as a part of step in ms.
137  StepSummary output_ms_summary = 3;
138  // Summary of all device-compute time as a part of step in ms.
139  StepSummary device_compute_ms_summary = 4;
140  // Summary of all device-to-device time as a part of step in ms.
141  StepSummary device_to_device_ms_summary = 5;
142  // Summary of all device-collectives time as a part of step in ms.
143  StepSummary device_collectives_ms_summary = 12;
144  // Summary of all host-compute time as a part of step in ms.
145  StepSummary host_compute_ms_summary = 6;
146  // Summary of all host-prepare time as a part of step in ms.
147  StepSummary host_prepare_ms_summary = 7;
148  // Summary of all compilation time as a part of step in ms.
149  StepSummary host_compile_ms_summary = 8;
150  reserved 2;
151}
152
153message InputPipelineAnalysisResult {
154  // Hardware type.
155  string hardware_type = 9;
156  // Summary of all step duration across all cores.
157  StepSummary step_time_summary = 2;
158  // Summary of all input-related stall as percentage of step duration.
159  StepSummary input_percent_summary = 3;
160  // Percentage of step time that is waiting for input.
161  double input_percent = 11;
162  // Percentage of step time that is doing output.
163  double output_percent = 13;
164  // Percentage of step time that is idle for non-I/O-related reason.
165  double idle_percent = 14;
166  // Percentage of step time that is doing compute.
167  double compute_percent = 15;
168  // Details of each step. Can be unpacked into a PerGenericStepDetails.
169  repeated google.protobuf.Any step_details = 4;
170  // The breakdown of the input processing time.
171  InputTimeBreakdown input_time_breakdown = 5;
172  // Details of each input Op executed.
173  repeated InputOpDetails input_op_details = 6;
174  // Recommendation for next steps to users.
175  InputPipelineAnalysisRecommendation recommendation = 7;
176  // Breakdown of the step time. Can be unpacked into a
177  // GenericStepTimeBreakdown.
178  google.protobuf.Any step_time_breakdown = 8;
179  // Error and warning messages for diagnosing profiling issues.
180  Diagnostics diagnostics = 12;
181  reserved 1, 10;
182}
183