1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // This file defines the StreamExecutor trace listener, used for inserting 17 // non-device-specific instrumentation into the StreamExecutor. 18 #ifndef TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_ 19 #define TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_ 20 21 #include "tensorflow/stream_executor/device_memory.h" 22 #include "tensorflow/stream_executor/kernel.h" 23 #include "tensorflow/stream_executor/launch_dim.h" 24 #include "tensorflow/stream_executor/lib/status.h" 25 26 namespace stream_executor { 27 28 class Stream; 29 30 // Traces StreamExecutor PIMPL-level events. 31 // The few StreamExecutor interfaces that are synchronous have both Begin and 32 // Complete versions of their trace calls. Asynchronous operations only have 33 // Submit calls, as execution of the underlying operations is device-specific. 34 // As all tracing calls mirror StreamExecutor routines, documentation here is 35 // minimal. 36 // 37 // All calls have default implementations that perform no work; subclasses 38 // should override functionality of interest. Keep in mind that these routines 39 // are not called on a dedicated thread, so callbacks should execute quickly. 40 // 41 // Note: This API is constructed on an as-needed basis. Users should add 42 // support for further StreamExecutor operations as required. By enforced 43 // convention (see SCOPED_TRACE in stream_executor_pimpl.cc), synchronous 44 // tracepoints should be named NameBegin and NameComplete. 45 class TraceListener { 46 public: ~TraceListener()47 virtual ~TraceListener() {} 48 LaunchSubmit(Stream * stream,const ThreadDim & thread_dims,const BlockDim & block_dims,const KernelBase & kernel,const KernelArgsArrayBase & args)49 virtual void LaunchSubmit(Stream* stream, const ThreadDim& thread_dims, 50 const BlockDim& block_dims, 51 const KernelBase& kernel, 52 const KernelArgsArrayBase& args) {} 53 SynchronousMemcpyH2DBegin(int64 correlation_id,const void * host_src,int64 size,DeviceMemoryBase * gpu_dst)54 virtual void SynchronousMemcpyH2DBegin(int64 correlation_id, 55 const void* host_src, int64 size, 56 DeviceMemoryBase* gpu_dst) {} SynchronousMemcpyH2DComplete(int64 correlation_id,const port::Status * result)57 virtual void SynchronousMemcpyH2DComplete(int64 correlation_id, 58 const port::Status* result) {} 59 SynchronousMemcpyD2HBegin(int64 correlation_id,const DeviceMemoryBase & gpu_src,int64 size,void * host_dst)60 virtual void SynchronousMemcpyD2HBegin(int64 correlation_id, 61 const DeviceMemoryBase& gpu_src, 62 int64 size, void* host_dst) {} SynchronousMemcpyD2HComplete(int64 correlation_id,const port::Status * result)63 virtual void SynchronousMemcpyD2HComplete(int64 correlation_id, 64 const port::Status* result) {} 65 BlockHostUntilDoneBegin(int64 correlation_id,Stream * stream)66 virtual void BlockHostUntilDoneBegin(int64 correlation_id, Stream* stream) {} BlockHostUntilDoneComplete(int64 correlation_id,const port::Status * result)67 virtual void BlockHostUntilDoneComplete(int64 correlation_id, 68 const port::Status* result) {} 69 }; 70 71 } // namespace stream_executor 72 73 #endif // TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_ 74