1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/stream_executor/kernel_spec.h"
17 #include "absl/strings/string_view.h"
18
19 namespace stream_executor {
20
KernelLoaderSpec(absl::string_view kernelname)21 KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname)
22 : kernelname_(string(kernelname)) {}
23
OnDiskKernelLoaderSpec(absl::string_view filename,absl::string_view kernelname)24 OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename,
25 absl::string_view kernelname)
26 : KernelLoaderSpec(kernelname), filename_(string(filename)) {}
27
CudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)28 CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename,
29 absl::string_view kernelname)
30 : OnDiskKernelLoaderSpec(filename, kernelname) {}
31
CudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)32 CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename,
33 absl::string_view kernelname)
34 : OnDiskKernelLoaderSpec(filename, kernelname) {}
35
CudaCubinInMemory(const char * bytes,absl::string_view kernelname)36 CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
37 absl::string_view kernelname)
38 : KernelLoaderSpec(kernelname), bytes_(bytes) {}
39
CompareComputeCapability(const std::tuple<int,int> & lhs,const std::tuple<int,int> & rhs)40 bool CompareComputeCapability(const std::tuple<int, int> &lhs,
41 const std::tuple<int, int> &rhs) {
42 return std::get<0>(lhs) < std::get<0>(rhs) ||
43 (std::get<0>(lhs) == std::get<0>(rhs) &&
44 std::get<1>(lhs) < std::get<1>(rhs));
45 }
46
47 const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
48
CudaPtxInMemory(absl::string_view ptx,absl::string_view kernel_name,bool ptx_compressed)49 CudaPtxInMemory::CudaPtxInMemory(absl::string_view ptx,
50 absl::string_view kernel_name,
51 bool ptx_compressed)
52 : KernelLoaderSpec(kernel_name),
53 ptx_by_compute_capability_(CompareComputeCapability) {
54 if (ptx_compressed) {
55 // Lazy decompression. Put an empty string in decompressed_ptx_ showing that
56 // the original ptx is compressed.
57 decompressed_ptx_[ptx.data()] = "";
58 }
59 ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
60 }
61
CudaPtxInMemory(const std::initializer_list<CudaPtxInMemory::PtxSpec> & spec_list,absl::string_view kernel_name,bool ptx_compressed)62 CudaPtxInMemory::CudaPtxInMemory(
63 const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
64 absl::string_view kernel_name, bool ptx_compressed)
65 : KernelLoaderSpec(kernel_name),
66 ptx_by_compute_capability_(CompareComputeCapability) {
67 for (const auto &spec : spec_list) {
68 int major, minor;
69 absl::string_view ptx;
70 std::tie(major, minor, ptx) = spec;
71 if (ptx_compressed) {
72 // Lazy decompression. Put an empty string in decompressed_ptx_ showing
73 // that the original ptx is compressed.
74 decompressed_ptx_[ptx.data()] = "";
75 }
76 ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
77 }
78 }
79
DecompressPtx(const char * ptx)80 string CudaPtxInMemory::DecompressPtx(const char *ptx) {
81 // Get the length of the PTX string from the beginning of the buffer.
82 uint64 ptx_length = *reinterpret_cast<const uint64 *>(ptx);
83 // Get the PTX string from the buffer with offset and length.
84 string compressed_ptx(ptx + sizeof(uint64),
85 ptx + sizeof(uint64) + ptx_length);
86 string decompressed_ptx;
87 // Decompress the PTX string with bzip2.
88 LOG(FATAL) << "bzip2 decompression is not supported yet.";
89 return decompressed_ptx;
90 }
91
default_text() const92 const char *CudaPtxInMemory::default_text() const {
93 if (ptx_by_compute_capability_.empty()) {
94 return nullptr;
95 }
96
97 mutex_lock lock(mu_);
98
99 auto ptx = ptx_by_compute_capability_.begin()->second;
100 // Check if there is an entry in decompressed ptx table.
101 auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
102 if (decompressed_ptx_iter != decompressed_ptx_.end()) {
103 // If the decompressed string is empty, which means the ptx hasn't been
104 // decompressed, decompress it here.
105 if (decompressed_ptx_iter->second.empty()) {
106 decompressed_ptx_iter->second = DecompressPtx(ptx);
107 }
108 return decompressed_ptx_iter->second.c_str();
109 }
110 return ptx;
111 }
112
original_default_text() const113 const char *CudaPtxInMemory::original_default_text() const {
114 if (ptx_by_compute_capability_.empty()) {
115 return nullptr;
116 }
117
118 return ptx_by_compute_capability_.begin()->second;
119 }
120
text(int compute_capability_major,int compute_capability_minor) const121 const char *CudaPtxInMemory::text(int compute_capability_major,
122 int compute_capability_minor) const {
123 std::tuple<int, int> capability{compute_capability_major,
124 compute_capability_minor};
125
126 auto ptx_iter = ptx_by_compute_capability_.find(capability);
127 if (ptx_iter == ptx_by_compute_capability_.end()) {
128 return nullptr;
129 }
130
131 mutex_lock lock(mu_);
132
133 // Check if there is an entry in decompressed ptx table.
134 auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
135 if (decompressed_ptx_iter != decompressed_ptx_.end()) {
136 // If the decompressed string is empty, which means the ptx hasn't been
137 // decompressed, decompress it here.
138 if (decompressed_ptx_iter->second.empty()) {
139 decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
140 }
141 return decompressed_ptx_iter->second.c_str();
142 }
143 return ptx_iter->second;
144 }
145
original_text(int compute_capability_major,int compute_capability_minor) const146 const char *CudaPtxInMemory::original_text(int compute_capability_major,
147 int compute_capability_minor) const {
148 std::tuple<int, int> capability{compute_capability_major,
149 compute_capability_minor};
150
151 auto ptx_iter = ptx_by_compute_capability_.find(capability);
152 if (ptx_iter == ptx_by_compute_capability_.end()) {
153 return nullptr;
154 }
155
156 return ptx_iter->second;
157 }
158
OpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)159 OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename,
160 absl::string_view kernelname)
161 : OnDiskKernelLoaderSpec(filename, kernelname) {}
162
OpenCLTextInMemory(absl::string_view text,absl::string_view kernelname)163 OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text,
164 absl::string_view kernelname)
165 : KernelLoaderSpec(kernelname), text_(text) {}
166
OpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)167 OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename,
168 absl::string_view kernelname)
169 : OnDiskKernelLoaderSpec(filename, kernelname) {}
170
AddOpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)171 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
172 absl::string_view filename, absl::string_view kernelname) {
173 CHECK(ocl_text_on_disk_ == nullptr);
174 ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
175 return this;
176 }
177
AddOpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)178 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
179 absl::string_view filename, absl::string_view kernelname) {
180 CHECK(ocl_binary_on_disk_ == nullptr);
181 ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
182 return this;
183 }
184
AddOpenCLTextInMemory(absl::string_view filename,absl::string_view kernelname)185 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
186 absl::string_view filename, absl::string_view kernelname) {
187 CHECK(ocl_text_in_memory_ == nullptr);
188 ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
189 return this;
190 }
191
AddCudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)192 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
193 absl::string_view filename, absl::string_view kernelname) {
194 CHECK(cuda_ptx_on_disk_ == nullptr);
195 cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
196 return this;
197 }
198
AddCudaCubinInMemory(const char * bytes,absl::string_view kernelname)199 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
200 const char *bytes, absl::string_view kernelname) {
201 CHECK(cuda_cubin_in_memory_ == nullptr);
202 cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
203 return this;
204 }
205
AddCudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)206 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
207 absl::string_view filename, absl::string_view kernelname) {
208 CHECK(cuda_cubin_on_disk_ == nullptr);
209 cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
210 return this;
211 }
212
AddCudaPtxInMemory(absl::string_view ptx,absl::string_view kernelname)213 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
214 absl::string_view ptx, absl::string_view kernelname) {
215 CHECK(cuda_ptx_in_memory_ == nullptr);
216 cuda_ptx_in_memory_.reset(
217 new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
218 return this;
219 }
220
AddCudaCompressedPtxInMemory(absl::string_view ptx,absl::string_view kernelname)221 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
222 absl::string_view ptx, absl::string_view kernelname) {
223 CHECK(cuda_ptx_in_memory_ == nullptr);
224 cuda_ptx_in_memory_.reset(
225 new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
226 return this;
227 }
228
AddCudaPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)229 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
230 std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
231 absl::string_view kernelname) {
232 CHECK(cuda_ptx_in_memory_ == nullptr);
233 cuda_ptx_in_memory_.reset(
234 new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
235 return this;
236 }
237
AddCudaCompressedPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)238 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
239 std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
240 absl::string_view kernelname) {
241 CHECK(cuda_ptx_in_memory_ == nullptr);
242 cuda_ptx_in_memory_.reset(
243 new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
244 return this;
245 }
246
MultiKernelLoaderSpec(size_t arity)247 MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
248
249 } // namespace stream_executor
250