• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_STEP_STATS_COLLECTOR_H_
16 #define TENSORFLOW_CORE_COMMON_RUNTIME_STEP_STATS_COLLECTOR_H_
17 
18 #include <memory>
19 #include <unordered_map>
20 #include <vector>
21 #include "tensorflow/core/framework/step_stats.pb.h"
22 #include "tensorflow/core/framework/tensor_reference.h"
23 #include "tensorflow/core/lib/gtl/inlined_vector.h"
24 #include "tensorflow/core/platform/env.h"
25 #include "tensorflow/core/platform/mutex.h"
26 #include "tensorflow/core/platform/thread_annotations.h"
27 #include "tensorflow/core/platform/types.h"
28 
29 namespace tensorflow {
30 
31 class Allocator;
32 class AllocatorMemoryUsed;
33 class CostModelManager;
34 class Graph;
35 class NodeDef;
36 class NodeExecStats;
37 class OpKernelContext;
38 class StepStats;
39 class StepStatsCollector;
40 class Tensor;
41 class TrackingAllocator;
42 
43 // Statistics collection interface for individual node execution.
44 //
45 // See `NodeExecStatsWrapper` for a concrete implementation of this interface
46 // that interfaces with the `Session` layer.
47 class NodeExecStatsInterface {
48  public:
~NodeExecStatsInterface()49   virtual ~NodeExecStatsInterface() {}
50 
51   // Called when the statistics collection for the node has finished. Once this
52   // method is called, the caller should not make assumptions about the validity
53   // of this object.
54   virtual void Done(const string& device) = 0;
55 
56   // Called immediately after this node starts being processed by the executor.
57   virtual void RecordExecutorStarted() = 0;
58 
59   // Called immediately before this node's `Compute()` or `ComputeAsync()`
60   // method is called.
61   virtual void RecordComputeStarted() = 0;
62 
63   // Called immediately after this node's `Compute()` method returned (or, for
64   // asynchronous operations, the callback passed to its `ComputeAsync()` method
65   // was called).
66   virtual void RecordComputeEnded() = 0;
67 
68   // Called immediately after this executor finishes processing this node.
69   virtual void RecordExecutorEnded() = 0;
70 
71   // Returns `true` if this object should track memory allocations.
72   virtual bool TrackAllocations() const = 0;
73 
74   // Records information about the memory allocated during the execution of this
75   // node.
76   //
77   // Takes ownership of any `TrackingAllocator` objects stored in `ctx`.
78   virtual void SetMemory(OpKernelContext* ctx) = 0;
79 
80   // Records information about the tensor produced by this node at the given
81   // output slot.
82   virtual void SetOutput(int slot, const Tensor* tensor) = 0;
83 
84   // Records information about the tensors that were accessed during the
85   // execution of this node.
86   virtual void SetReferencedTensors(const TensorReferenceVector& tensors) = 0;
87 
88   // Records the absolute time in nanoseconds at which this node became
89   // runnable (i.e. was scheduled for execution).
90   virtual void SetScheduled(int64 nanos) = 0;
91 };
92 
93 // Wraps NodeExecStats and adds allocation to it.
94 class NodeExecStatsWrapper : public NodeExecStatsInterface {
95  public:
96   // Does not take ownership of `node` or `step_stats_collector`.
97   NodeExecStatsWrapper(const NodeDef* node,
98                        StepStatsCollector* step_stats_collector);
99 
100   // Takes ownership of 'stats' but not `node` or `step_stats_collector`.
101   NodeExecStatsWrapper(std::unique_ptr<NodeExecStats> stats,
102                        const NodeDef* node,
103                        StepStatsCollector* step_stats_collector);
104 
105   // Destructor calls Finalize() to release the TrackingAllocators.
~NodeExecStatsWrapper()106   ~NodeExecStatsWrapper() override { Finalize(); }
107 
108   void Done(const string& device) override;
109   void RecordExecutorStarted() override;
110   void RecordComputeStarted() override;
111   void RecordComputeEnded() override;
112   void RecordExecutorEnded() override;
TrackAllocations()113   bool TrackAllocations() const override { return true; }
114   void SetMemory(OpKernelContext* ctx) override;
115   void SetOutput(int slot, const Tensor* tensor) override;
116   void SetReferencedTensors(const TensorReferenceVector& tensors) override;
117   void SetScheduled(int64 nanos) override;
118 
119  private:
120   friend class StepStatsCollector;
121 
stats()122   NodeExecStats* stats() { return stats_.get(); }
123 
124   // Populates stats_ and releases TrackingAllocator.
125   void Finalize();
126 
127   // Does not take ownership of the `allocator`.
128   // Takes ownership of `tracking_allocator`.
129   void AddAllocation(Allocator* allocator,
130                      TrackingAllocator* tracking_allocator);
131 
132   gtl::InlinedVector<std::pair<AllocatorMemoryUsed*, TrackingAllocator*>, 2>
133       allocations_;
134   std::unique_ptr<NodeExecStats> stats_;
135   const NodeDef* const node_;                       // Not owned.
136   StepStatsCollector* const step_stats_collector_;  // Not owned.
137 };
138 
139 // Statistics collection interface for step execution.
140 //
141 // See `StepStatsCollector` for a concrete implementation of this interface
142 // that interfaces with the `Session` layer.
143 class StepStatsCollectorInterface {
144  public:
~StepStatsCollectorInterface()145   virtual ~StepStatsCollectorInterface() {}
146 
147   // Creates an instance of `NodeExecStatsInterface` that should be used for
148   // collecting statistics about individual node execution.
149   virtual NodeExecStatsInterface* CreateNodeExecStats(const NodeDef* node) = 0;
150 
151   // Generates a string reporting the currently used memory based
152   // on ResourceExhausted OOM `err` message.
153   // `err` message needs to contain device name and allocator name, e.g.:
154   // "ResourceExhaustedError: OOM when allocating tensor ...
155   // on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc"
156   virtual string ReportAllocsOnResourceExhausted(const string& err) = 0;
157 };
158 
159 // StepStatsCollector manages the collection of a StepStats object.
160 // The StepStats object holds multiple DeviceStats.
161 // Each DeviceStats object holds multiple NodeExecStats.
162 class StepStatsCollector : public StepStatsCollectorInterface {
163  public:
164   // Does not take ownership of `step_stats`.
165   explicit StepStatsCollector(StepStats* step_stats);
166 
167   // BuildCostModel builds or updates a CostModel managed by cost_model_manager,
168   // using the currently collected DeviceStats associated with the devices in
169   // device_map.
170   void BuildCostModel(
171       CostModelManager* cost_model_manager,
172       const std::unordered_map<string, const Graph*>& device_map);
173 
174   // Saves node statistics to the DeviceStats object associated with device.
175   // Should be called before Finalize.
176   void Save(const string& device, NodeExecStats* node_stats_pb);
177   void Save(const string& device, NodeExecStatsWrapper* node_stats);
178 
179   // Saves thread name.
180   void SaveThreadName(const string& device, const uint32 thread_id,
181                       const string& thread_name);
182 
183   NodeExecStatsInterface* CreateNodeExecStats(const NodeDef* node) override;
184   string ReportAllocsOnResourceExhausted(const string& err) override;
185 
186   // The following 2 Finalize methods populate the StepStats passed
187   // from the constructor. Calling it more than once won't have any effect.
188   // User shouldn't call Save() methods after Finalize.
189   void Finalize();
190   // swaps the content of StepStats* from constructor with 'ss'.
191   void FinalizeAndSwap(StepStats* step_stats);
192 
193  private:
194   // TODO(suharshs): Make this configurable if its not possible to find a value
195   // that works for all cases.
196   static const uint64 kMaxCollectedNodes = 1 << 20;
197 
198   typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
199   typedef std::unordered_map<uint32, string> ThreadNamesMap;
200 
201   void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
202 
203   mutex mu_;
204   bool finalized_ GUARDED_BY(mu_);
205   std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
206   std::unordered_map<string, ThreadNamesMap> thread_names_ GUARDED_BY(mu_);
207   StepStats* step_stats_ GUARDED_BY(mu_);
208   uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
209 };
210 
211 }  // namespace tensorflow
212 
213 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_STEP_STATS_COLLECTOR_H_
214