1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/lib/profiler_session.h"
17 #include <cstddef>
18 #include <string>
19 #include "tensorflow/core/common_runtime/eager/context.h"
20 #include "tensorflow/core/lib/core/error_codes.pb.h"
21 #include "tensorflow/core/platform/env.h"
22 #include "tensorflow/core/platform/mutex.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/internal/gpu/tracer.h"
25 #include "tensorflow/core/profiler/internal/runtime/eager_profiler.h"
26 #include "tensorflow/core/profiler/trace_events.pb.h"
27 #include "tensorflow/core/protobuf/config.pb.h"
28
29 namespace tensorflow {
30
31 namespace {
32
33 // Track whether there's an active ProfilerSession.
34 // Prevents another ProfilerSession from creating ProfilerInterface(s), as they
35 // use singletons that do not allow concurrent profiling request (e.g.,
36 // DeviceTracer).
37 std::atomic<bool> session_active = ATOMIC_VAR_INIT(false);
38
AssignLanes(RunMetadata * run_metadata)39 void AssignLanes(RunMetadata* run_metadata) {
40 for (size_t device_id = 0;
41 device_id < run_metadata->step_stats().dev_stats_size(); ++device_id) {
42 auto* device_stats =
43 run_metadata->mutable_step_stats()->mutable_dev_stats(device_id);
44 if (device_stats->thread_names_size() > 0 ||
45 device_stats->node_stats_size() == 0) {
46 continue;
47 }
48 std::vector<uint64> lanes;
49 for (auto ns = device_stats->mutable_node_stats()->rbegin();
50 ns != device_stats->mutable_node_stats()->rend(); ns++) {
51 uint64 end_micros = ns->all_start_micros() + ns->all_end_rel_micros();
52 bool found_lane = false;
53 for (size_t l = 0; l < lanes.size(); l++) {
54 if (end_micros <= lanes[l]) {
55 ns->set_thread_id(l);
56 found_lane = true;
57 lanes[l] = ns->all_start_micros();
58 break;
59 }
60 }
61 if (!found_lane) {
62 ns->set_thread_id(lanes.size());
63 lanes.push_back(ns->all_start_micros());
64 }
65 }
66 }
67 }
68
ConvertRunMetadataToTraceEvent(RunMetadata * run_metadata,profiler::Trace * trace,const uint64 profile_start_time_micros)69 void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
70 profiler::Trace* trace,
71 const uint64 profile_start_time_micros) {
72 AssignLanes(run_metadata);
73 auto trace_devices = trace->mutable_devices();
74
75 for (size_t device_id = 0;
76 device_id < run_metadata->step_stats().dev_stats_size(); ++device_id) {
77 // Create device
78 auto* device_stats =
79 run_metadata->mutable_step_stats()->mutable_dev_stats(device_id);
80 profiler::Device device;
81 device.set_name(device_stats->device());
82 device.set_device_id(device_id);
83 profiler::Resource resource;
84 resource.set_name("0");
85 resource.set_resource_id(0);
86 (*device.mutable_resources())[0] = resource;
87 for (const auto& thread_name : device_stats->thread_names()) {
88 profiler::Resource resource;
89 resource.set_resource_id(thread_name.first);
90 resource.set_name(thread_name.second);
91 (*device.mutable_resources())[thread_name.first] = resource;
92 }
93 (*trace_devices)[device_id] = device;
94
95 // Emit events.
96 for (auto node :
97 run_metadata->step_stats().dev_stats(device_id).node_stats()) {
98 if (node.all_start_micros() < profile_start_time_micros) {
99 continue;
100 }
101 auto* event = trace->add_trace_events();
102 auto* args = event->mutable_args();
103 event->set_device_id(device_id);
104 event->set_resource_id(node.thread_id());
105 event->set_name(node.node_name());
106 event->set_timestamp_ps(
107 (node.all_start_micros() - profile_start_time_micros) *
108 EnvTime::kMicrosToPicos);
109 event->set_duration_ps(node.all_end_rel_micros() *
110 EnvTime::kMicrosToPicos);
111 (*args)["label"] = node.timeline_label();
112 }
113 }
114
115 // TODO(fishx): Convert allocation data as well.
116 }
117
118 } // namespace
119
Create(ProfilerContext * const context)120 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create(
121 ProfilerContext* const context) {
122 return absl::WrapUnique(new ProfilerSession(context));
123 }
124
Status()125 Status ProfilerSession::Status() {
126 mutex_lock l(mutex_);
127 return status_;
128 }
129
SerializeToString(string * content)130 Status ProfilerSession::SerializeToString(string* content) {
131 mutex_lock l(mutex_);
132 if (!status_.ok()) return status_;
133 for (auto& profiler : profilers_) {
134 profiler->Stop().IgnoreError();
135 }
136 RunMetadata run_metadata;
137 for (auto& profiler : profilers_) {
138 profiler->CollectData(&run_metadata).IgnoreError();
139 }
140
141 if (active_) {
142 // Allow another session to start.
143 session_active.store(false);
144 active_ = false;
145 }
146
147 profiler::Trace trace;
148
149 ConvertRunMetadataToTraceEvent(&run_metadata, &trace, start_time_micros_);
150
151 trace.SerializeToString(content);
152 return Status::OK();
153 }
154
ProfilerSession(ProfilerContext * const context)155 ProfilerSession::ProfilerSession(ProfilerContext* const context)
156 : active_(!session_active.exchange(true)),
157 start_time_micros_(Env::Default()->NowNanos() / EnvTime::kMicrosToNanos) {
158 if (!active_) {
159 status_ = tensorflow::Status(tensorflow::error::Code::UNAVAILABLE,
160 "Another profiling session is active.");
161 return;
162 }
163
164 LOG(INFO) << "Profile Session started.";
165
166 if (context->eager_context != nullptr) {
167 profilers_.push_back(tensorflow::profiler::runtime::EagerProfiler::Create(
168 context->eager_context));
169 }
170 profilers_.push_back(tensorflow::profiler::gpu::Tracer::Create());
171
172 status_ = Status::OK();
173
174 for (auto& profiler : profilers_) {
175 profiler->Start().IgnoreError();
176 }
177 }
178
~ProfilerSession()179 ProfilerSession::~ProfilerSession() {
180 for (auto& profiler : profilers_) {
181 profiler->Stop().IgnoreError();
182 }
183
184 if (active_) {
185 // Allow another session to start.
186 session_active.store(false);
187 }
188 }
189
190 } // namespace tensorflow
191