1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Defines the CUDATimer type - the CUDA-specific implementation of the generic 17 // StreamExecutor Timer interface. 18 19 #ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_ 20 #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_ 21 22 #include "tensorflow/stream_executor/stream_executor_internal.h" 23 #include "tensorflow/stream_executor/cuda/cuda_driver.h" 24 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h" 25 26 namespace perftools { 27 namespace gputools { 28 namespace cuda { 29 30 class CUDAExecutor; 31 class CUDAStream; 32 33 // Wraps a pair of CUevents in order to satisfy the platform-independent 34 // TimerInferface -- both a start and a stop event are present which may be 35 // recorded in a stream. 36 class CUDATimer : public internal::TimerInterface { 37 public: CUDATimer(CUDAExecutor * parent)38 explicit CUDATimer(CUDAExecutor *parent) 39 : parent_(parent), start_event_(nullptr), stop_event_(nullptr) {} 40 41 // Note: teardown is explicitly handled in this API by a call to 42 // StreamExecutor::DeallocateTimer(), which invokes Destroy(). ~CUDATimer()43 ~CUDATimer() override {} 44 45 // Allocates the platform-specific pieces of the timer, called as part of 46 // StreamExecutor::AllocateTimer(). 47 bool Init(); 48 49 // Deallocates the platform-specific pieces of the timer, called as part of 50 // StreamExecutor::DeallocateTimer(). 51 void Destroy(); 52 53 // Records the "timer start" event at the current point in the stream. 54 bool Start(CUDAStream *stream); 55 56 // Records the "timer stop" event at the current point in the stream. 57 bool Stop(CUDAStream *stream); 58 59 // Returns the elapsed time, in milliseconds, between the start and stop 60 // events. 61 float GetElapsedMilliseconds() const; 62 63 // See perftools::gputools::Timer::Microseconds(). 64 // TODO(leary) make this into an error code interface... Microseconds()65 uint64 Microseconds() const override { 66 return GetElapsedMilliseconds() * 1e3; 67 } 68 69 // See perftools::GPUTools::Timer::Nanoseconds(). Nanoseconds()70 uint64 Nanoseconds() const override { return GetElapsedMilliseconds() * 1e6; } 71 72 private: 73 CUDAExecutor *parent_; 74 CUevent start_event_; // Event recorded to indicate the "start" timestamp 75 // executing in a stream. 76 CUevent stop_event_; // Event recorded to indicate the "stop" timestamp 77 // executing in a stream. 78 }; 79 80 } // namespace cuda 81 } // namespace gputools 82 } // namespace perftools 83 84 #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_ 85