• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Defines the CUDATimer type - the CUDA-specific implementation of the generic
17 // StreamExecutor Timer interface.
18 
19 #ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
20 #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
21 
22 #include "tensorflow/stream_executor/stream_executor_internal.h"
23 #include "tensorflow/stream_executor/cuda/cuda_driver.h"
24 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
25 
26 namespace perftools {
27 namespace gputools {
28 namespace cuda {
29 
30 class CUDAExecutor;
31 class CUDAStream;
32 
33 // Wraps a pair of CUevents in order to satisfy the platform-independent
34 // TimerInferface -- both a start and a stop event are present which may be
35 // recorded in a stream.
36 class CUDATimer : public internal::TimerInterface {
37  public:
CUDATimer(CUDAExecutor * parent)38   explicit CUDATimer(CUDAExecutor *parent)
39       : parent_(parent), start_event_(nullptr), stop_event_(nullptr) {}
40 
41   // Note: teardown is explicitly handled in this API by a call to
42   // StreamExecutor::DeallocateTimer(), which invokes Destroy().
~CUDATimer()43   ~CUDATimer() override {}
44 
45   // Allocates the platform-specific pieces of the timer, called as part of
46   // StreamExecutor::AllocateTimer().
47   bool Init();
48 
49   // Deallocates the platform-specific pieces of the timer, called as part of
50   // StreamExecutor::DeallocateTimer().
51   void Destroy();
52 
53   // Records the "timer start" event at the current point in the stream.
54   bool Start(CUDAStream *stream);
55 
56   // Records the "timer stop" event at the current point in the stream.
57   bool Stop(CUDAStream *stream);
58 
59   // Returns the elapsed time, in milliseconds, between the start and stop
60   // events.
61   float GetElapsedMilliseconds() const;
62 
63   // See perftools::gputools::Timer::Microseconds().
64   // TODO(leary) make this into an error code interface...
Microseconds()65   uint64 Microseconds() const override {
66     return GetElapsedMilliseconds() * 1e3;
67   }
68 
69   // See perftools::GPUTools::Timer::Nanoseconds().
Nanoseconds()70   uint64 Nanoseconds() const override { return GetElapsedMilliseconds() * 1e6; }
71 
72  private:
73   CUDAExecutor *parent_;
74   CUevent start_event_;  // Event recorded to indicate the "start" timestamp
75                          // executing in a stream.
76   CUevent stop_event_;   // Event recorded to indicate the "stop" timestamp
77                          // executing in a stream.
78 };
79 
80 }  // namespace cuda
81 }  // namespace gputools
82 }  // namespace perftools
83 
84 #endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
85