1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Declares the HostExecutor class, which is a CPU-only implementation of 17 // the StreamExecutor interface. For now, this is used for testing and to 18 // examine the performance of host-based StreamExecutor code. 19 #ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 20 #define TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 21 22 #include "tensorflow/stream_executor/blas.h" 23 #include "tensorflow/stream_executor/host/host_stream.h" 24 #include "tensorflow/stream_executor/host/host_timer.h" 25 #include "tensorflow/stream_executor/lib/error.h" 26 #include "tensorflow/stream_executor/lib/status.h" 27 #include "tensorflow/stream_executor/rng.h" 28 #include "tensorflow/stream_executor/stream_executor.h" 29 #include "tensorflow/stream_executor/stream_executor_internal.h" 30 31 namespace stream_executor { 32 namespace host { 33 34 // An implementation of StreamExecutor that does no communication or interaction 35 // with a device, but DOES perform memory operations backed by the host. 36 // Plugin routines (RNG, BLAS) are also supported and functional. 37 // Kernel invocations will fail, but host callbacks may be enqueued on this 38 // executor and its associated stream, and should follow standard ordering 39 // semantics. 40 // 41 // This is useful for evaluating the performance of host-based or fallback 42 // routines executed under the context of a GPU executor. 43 // See stream_executor.h for description of the below operations. 44 class HostExecutor : public internal::StreamExecutorInterface { 45 public: 46 explicit HostExecutor(const PluginConfig &plugin_config); 47 ~HostExecutor() override; 48 49 // The stack size used for host streams can be set via 50 // device_options.non_portable_tags["host_stack_size"]. 51 port::Status Init(int device_ordinal, DeviceOptions device_options) override; 52 GetKernel(const MultiKernelLoaderSpec & spec,KernelBase * kernel)53 port::Status GetKernel(const MultiKernelLoaderSpec &spec, 54 KernelBase *kernel) override { 55 return port::UnimplementedError("Not Implemented"); 56 } Launch(Stream * stream,const ThreadDim & thread_dims,const BlockDim & block_dims,const KernelBase & kernel,const KernelArgsArrayBase & args)57 port::Status Launch(Stream *stream, const ThreadDim &thread_dims, 58 const BlockDim &block_dims, const KernelBase &kernel, 59 const KernelArgsArrayBase &args) override { 60 return port::UnimplementedError("Not Implemented"); 61 } 62 63 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; 64 void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes, 65 uint64 size_bytes) override; 66 void Deallocate(DeviceMemoryBase *mem) override; 67 HostMemoryAllocate(uint64 size)68 void *HostMemoryAllocate(uint64 size) override { return new char[size]; } HostMemoryDeallocate(void * mem)69 void HostMemoryDeallocate(void *mem) override { 70 delete[] static_cast<char *>(mem); 71 } HostMemoryRegister(void * mem,uint64 size)72 bool HostMemoryRegister(void *mem, uint64 size) override { return true; } HostMemoryUnregister(void * mem)73 bool HostMemoryUnregister(void *mem) override { return true; } 74 75 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, 76 uint64 size) override; 77 bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src, 78 uint64 size) override; 79 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst, 80 const DeviceMemoryBase &gpu_src, 81 uint64 size) override; 82 83 port::Status MemZero(Stream *stream, DeviceMemoryBase *location, 84 uint64 size) override; 85 port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, 86 uint64 size) override; 87 port::Status Memset32(Stream *stream, DeviceMemoryBase *location, 88 uint32 pattern, uint64 size) override; 89 90 // No "synchronize all activity" implemented for this platform at the moment. SynchronizeAllActivity()91 bool SynchronizeAllActivity() override { return true; } 92 port::Status SynchronousMemZero(DeviceMemoryBase *location, 93 uint64 size) override; 94 95 port::Status SynchronousMemSet(DeviceMemoryBase *location, int value, 96 uint64 size) override; 97 98 port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, 99 const void *host_src, uint64 size) override; 100 port::Status SynchronousMemcpy(void *host_dst, 101 const DeviceMemoryBase &gpu_src, 102 uint64 size) override; 103 port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, 104 const DeviceMemoryBase &gpu_src, 105 uint64 size) override; 106 107 bool HostCallback(Stream *stream, 108 std::function<port::Status()> callback) override; 109 110 port::Status AllocateEvent(Event *event) override; 111 port::Status DeallocateEvent(Event *event) override; 112 port::Status RecordEvent(Stream *stream, Event *event) override; 113 port::Status WaitForEvent(Stream *stream, Event *event) override; 114 Event::Status PollForEventStatus(Event *event) override; 115 116 bool AllocateStream(Stream *stream) override; 117 void DeallocateStream(Stream *stream) override; 118 bool CreateStreamDependency(Stream *dependent, Stream *other) override; 119 120 // No special initialization is necessary for host timers. AllocateTimer(Timer * timer)121 bool AllocateTimer(Timer *timer) override { return true; } 122 DeallocateTimer(Timer * timer)123 void DeallocateTimer(Timer *timer) override {} 124 125 bool StartTimer(Stream *stream, Timer *timer) override; 126 127 bool StopTimer(Stream *stream, Timer *timer) override; 128 129 port::Status BlockHostUntilDone(Stream *stream) override; 130 PlatformDeviceCount()131 int PlatformDeviceCount() override { return 1; } 132 133 bool DeviceMemoryUsage(int64 *free, int64 *total) const override; 134 CreateDeviceDescription()135 port::StatusOr<std::unique_ptr<DeviceDescription>> CreateDeviceDescription() 136 const override { 137 return CreateDeviceDescription(0); 138 } 139 140 static port::StatusOr<std::unique_ptr<DeviceDescription>> 141 CreateDeviceDescription(int device_ordinal); 142 EnablePeerAccessTo(StreamExecutorInterface * other)143 port::Status EnablePeerAccessTo(StreamExecutorInterface *other) override { 144 return port::Status::OK(); 145 } 146 CanEnablePeerAccessTo(StreamExecutorInterface * other)147 bool CanEnablePeerAccessTo(StreamExecutorInterface *other) override { 148 return true; 149 } 150 151 bool SupportsBlas() const override; 152 blas::BlasSupport *CreateBlas() override; 153 SupportsDnn()154 bool SupportsDnn() const override { return false; } CreateDnn()155 dnn::DnnSupport *CreateDnn() override { return nullptr; } 156 157 bool SupportsFft() const override; 158 fft::FftSupport *CreateFft() override; 159 160 bool SupportsRng() const override; 161 rng::RngSupport *CreateRng() override; 162 163 std::unique_ptr<internal::EventInterface> CreateEventImplementation() 164 override; 165 CreateKernelImplementation()166 std::unique_ptr<internal::KernelInterface> CreateKernelImplementation() 167 override { 168 return nullptr; 169 } 170 171 std::unique_ptr<internal::StreamInterface> GetStreamImplementation() override; 172 GetTimerImplementation()173 std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override { 174 return std::unique_ptr<internal::TimerInterface>(new HostTimer()); 175 } 176 GpuContextHack()177 void *GpuContextHack() override { return nullptr; } 178 179 private: 180 const PluginConfig plugin_config_; 181 // Size of thread stacks for streams in bytes. '0' means "the default size". 182 size_t thread_stack_size_in_bytes_ = 0; 183 }; 184 185 } // namespace host 186 } // namespace stream_executor 187 188 #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 189