• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Declares the HostExecutor class, which is a CPU-only implementation of
17 // the StreamExecutor interface. For now, this is used for testing and to
18 // examine the performance of host-based StreamExecutor code.
19 #ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
20 #define TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
21 
22 #include "tensorflow/stream_executor/blas.h"
23 #include "tensorflow/stream_executor/host/host_stream.h"
24 #include "tensorflow/stream_executor/host/host_timer.h"
25 #include "tensorflow/stream_executor/lib/error.h"
26 #include "tensorflow/stream_executor/lib/status.h"
27 #include "tensorflow/stream_executor/rng.h"
28 #include "tensorflow/stream_executor/stream_executor.h"
29 #include "tensorflow/stream_executor/stream_executor_internal.h"
30 
31 namespace stream_executor {
32 namespace host {
33 
34 // An implementation of StreamExecutor that does no communication or interaction
35 // with a device, but DOES perform memory operations backed by the host.
36 // Plugin routines (RNG, BLAS) are also supported and functional.
37 // Kernel invocations will fail, but host callbacks may be enqueued on this
38 // executor and its associated stream, and should follow standard ordering
39 // semantics.
40 //
41 // This is useful for evaluating the performance of host-based or fallback
42 // routines executed under the context of a GPU executor.
43 // See stream_executor.h for description of the below operations.
44 class HostExecutor : public internal::StreamExecutorInterface {
45  public:
46   explicit HostExecutor(const PluginConfig &plugin_config);
47   ~HostExecutor() override;
48 
49   // The stack size used for host streams can be set via
50   // device_options.non_portable_tags["host_stack_size"].
51   port::Status Init(int device_ordinal, DeviceOptions device_options) override;
52 
GetKernel(const MultiKernelLoaderSpec & spec,KernelBase * kernel)53   port::Status GetKernel(const MultiKernelLoaderSpec &spec,
54                          KernelBase *kernel) override {
55     return port::UnimplementedError("Not Implemented");
56   }
Launch(Stream * stream,const ThreadDim & thread_dims,const BlockDim & block_dims,const KernelBase & kernel,const KernelArgsArrayBase & args)57   port::Status Launch(Stream *stream, const ThreadDim &thread_dims,
58                       const BlockDim &block_dims, const KernelBase &kernel,
59                       const KernelArgsArrayBase &args) override {
60     return port::UnimplementedError("Not Implemented");
61   }
62 
63   DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override;
64   void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes,
65                      uint64 size_bytes) override;
66   void Deallocate(DeviceMemoryBase *mem) override;
67 
HostMemoryAllocate(uint64 size)68   void *HostMemoryAllocate(uint64 size) override { return new char[size]; }
HostMemoryDeallocate(void * mem)69   void HostMemoryDeallocate(void *mem) override {
70     delete[] static_cast<char *>(mem);
71   }
HostMemoryRegister(void * mem,uint64 size)72   bool HostMemoryRegister(void *mem, uint64 size) override { return true; }
HostMemoryUnregister(void * mem)73   bool HostMemoryUnregister(void *mem) override { return true; }
74 
75   bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src,
76               uint64 size) override;
77   bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src,
78               uint64 size) override;
79   bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst,
80                             const DeviceMemoryBase &gpu_src,
81                             uint64 size) override;
82 
83   port::Status MemZero(Stream *stream, DeviceMemoryBase *location,
84                        uint64 size) override;
85   port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern,
86                       uint64 size) override;
87   port::Status Memset32(Stream *stream, DeviceMemoryBase *location,
88                         uint32 pattern, uint64 size) override;
89 
90   // No "synchronize all activity" implemented for this platform at the moment.
SynchronizeAllActivity()91   bool SynchronizeAllActivity() override { return true; }
92   port::Status SynchronousMemZero(DeviceMemoryBase *location,
93                                   uint64 size) override;
94 
95   port::Status SynchronousMemSet(DeviceMemoryBase *location, int value,
96                                  uint64 size) override;
97 
98   port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
99                                  const void *host_src, uint64 size) override;
100   port::Status SynchronousMemcpy(void *host_dst,
101                                  const DeviceMemoryBase &gpu_src,
102                                  uint64 size) override;
103   port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
104                                                const DeviceMemoryBase &gpu_src,
105                                                uint64 size) override;
106 
107   bool HostCallback(Stream *stream,
108                     std::function<port::Status()> callback) override;
109 
110   port::Status AllocateEvent(Event *event) override;
111   port::Status DeallocateEvent(Event *event) override;
112   port::Status RecordEvent(Stream *stream, Event *event) override;
113   port::Status WaitForEvent(Stream *stream, Event *event) override;
114   Event::Status PollForEventStatus(Event *event) override;
115 
116   bool AllocateStream(Stream *stream) override;
117   void DeallocateStream(Stream *stream) override;
118   bool CreateStreamDependency(Stream *dependent, Stream *other) override;
119 
120   // No special initialization is necessary for host timers.
AllocateTimer(Timer * timer)121   bool AllocateTimer(Timer *timer) override { return true; }
122 
DeallocateTimer(Timer * timer)123   void DeallocateTimer(Timer *timer) override {}
124 
125   bool StartTimer(Stream *stream, Timer *timer) override;
126 
127   bool StopTimer(Stream *stream, Timer *timer) override;
128 
129   port::Status BlockHostUntilDone(Stream *stream) override;
130 
PlatformDeviceCount()131   int PlatformDeviceCount() override { return 1; }
132 
133   bool DeviceMemoryUsage(int64 *free, int64 *total) const override;
134 
CreateDeviceDescription()135   port::StatusOr<std::unique_ptr<DeviceDescription>> CreateDeviceDescription()
136       const override {
137     return CreateDeviceDescription(0);
138   }
139 
140   static port::StatusOr<std::unique_ptr<DeviceDescription>>
141   CreateDeviceDescription(int device_ordinal);
142 
EnablePeerAccessTo(StreamExecutorInterface * other)143   port::Status EnablePeerAccessTo(StreamExecutorInterface *other) override {
144     return port::Status::OK();
145   }
146 
CanEnablePeerAccessTo(StreamExecutorInterface * other)147   bool CanEnablePeerAccessTo(StreamExecutorInterface *other) override {
148     return true;
149   }
150 
151   bool SupportsBlas() const override;
152   blas::BlasSupport *CreateBlas() override;
153 
SupportsDnn()154   bool SupportsDnn() const override { return false; }
CreateDnn()155   dnn::DnnSupport *CreateDnn() override { return nullptr; }
156 
157   bool SupportsFft() const override;
158   fft::FftSupport *CreateFft() override;
159 
160   bool SupportsRng() const override;
161   rng::RngSupport *CreateRng() override;
162 
163   std::unique_ptr<internal::EventInterface> CreateEventImplementation()
164       override;
165 
CreateKernelImplementation()166   std::unique_ptr<internal::KernelInterface> CreateKernelImplementation()
167       override {
168     return nullptr;
169   }
170 
171   std::unique_ptr<internal::StreamInterface> GetStreamImplementation() override;
172 
GetTimerImplementation()173   std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override {
174     return std::unique_ptr<internal::TimerInterface>(new HostTimer());
175   }
176 
GpuContextHack()177   void *GpuContextHack() override { return nullptr; }
178 
179  private:
180   const PluginConfig plugin_config_;
181   // Size of thread stacks for streams in bytes. '0' means "the default size".
182   size_t thread_stack_size_in_bytes_ = 0;
183 };
184 
185 }  // namespace host
186 }  // namespace stream_executor
187 
188 #endif  // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
189