• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This file defines the StreamExecutor trace listener, used for inserting
17 // non-device-specific instrumentation into the StreamExecutor.
18 #ifndef TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_
19 #define TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_
20 
21 #include "tensorflow/stream_executor/device_memory.h"
22 #include "tensorflow/stream_executor/kernel.h"
23 #include "tensorflow/stream_executor/launch_dim.h"
24 #include "tensorflow/stream_executor/lib/status.h"
25 
26 namespace stream_executor {
27 
28 class Stream;
29 
30 // Traces StreamExecutor PIMPL-level events.
31 // The few StreamExecutor interfaces that are synchronous have both Begin and
32 // Complete versions of their trace calls. Asynchronous operations only have
33 // Submit calls, as execution of the underlying operations is device-specific.
34 // As all tracing calls mirror StreamExecutor routines, documentation here is
35 // minimal.
36 //
37 // All calls have default implementations that perform no work; subclasses
38 // should override functionality of interest. Keep in mind that these routines
39 // are not called on a dedicated thread, so callbacks should execute quickly.
40 //
41 // Note: This API is constructed on an as-needed basis. Users should add
42 // support for further StreamExecutor operations as required. By enforced
43 // convention (see SCOPED_TRACE in stream_executor_pimpl.cc), synchronous
44 // tracepoints should be named NameBegin and NameComplete.
45 class TraceListener {
46  public:
~TraceListener()47   virtual ~TraceListener() {}
48 
LaunchSubmit(Stream * stream,const ThreadDim & thread_dims,const BlockDim & block_dims,const KernelBase & kernel,const KernelArgsArrayBase & args)49   virtual void LaunchSubmit(Stream* stream, const ThreadDim& thread_dims,
50                             const BlockDim& block_dims,
51                             const KernelBase& kernel,
52                             const KernelArgsArrayBase& args) {}
53 
SynchronousMemcpyH2DBegin(int64 correlation_id,const void * host_src,int64 size,DeviceMemoryBase * gpu_dst)54   virtual void SynchronousMemcpyH2DBegin(int64 correlation_id,
55                                          const void* host_src, int64 size,
56                                          DeviceMemoryBase* gpu_dst) {}
SynchronousMemcpyH2DComplete(int64 correlation_id,const port::Status * result)57   virtual void SynchronousMemcpyH2DComplete(int64 correlation_id,
58                                             const port::Status* result) {}
59 
SynchronousMemcpyD2HBegin(int64 correlation_id,const DeviceMemoryBase & gpu_src,int64 size,void * host_dst)60   virtual void SynchronousMemcpyD2HBegin(int64 correlation_id,
61                                          const DeviceMemoryBase& gpu_src,
62                                          int64 size, void* host_dst) {}
SynchronousMemcpyD2HComplete(int64 correlation_id,const port::Status * result)63   virtual void SynchronousMemcpyD2HComplete(int64 correlation_id,
64                                             const port::Status* result) {}
65 
BlockHostUntilDoneBegin(int64 correlation_id,Stream * stream)66   virtual void BlockHostUntilDoneBegin(int64 correlation_id, Stream* stream) {}
BlockHostUntilDoneComplete(int64 correlation_id,const port::Status * result)67   virtual void BlockHostUntilDoneComplete(int64 correlation_id,
68                                           const port::Status* result) {}
69 };
70 
71 }  // namespace stream_executor
72 
73 #endif  // TENSORFLOW_STREAM_EXECUTOR_TRACE_LISTENER_H_
74