• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_GPU_ASM_COMPILER_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_GPU_ASM_COMPILER_H_
18 
19 #include <string>
20 
21 #include "absl/types/span.h"
22 #include "tensorflow/stream_executor/lib/statusor.h"
23 #include "tensorflow/stream_executor/platform/port.h"
24 
25 namespace stream_executor {
26 // Compilation options for compiling ptxas.
27 struct GpuAsmOpts {
28   // Disable Cuda ptxas optimizations.
29   bool disable_gpuasm_optimizations;
30 
31   // Cuda directory which would be searched first.
32   std::string preferred_cuda_dir;
33 
34   explicit GpuAsmOpts(bool disable_gpuasm_optimizations = false,
35                       absl::string_view preferred_cuda_dir = "")
disable_gpuasm_optimizationsGpuAsmOpts36       : disable_gpuasm_optimizations(disable_gpuasm_optimizations),
37         preferred_cuda_dir(preferred_cuda_dir) {}
38 
39   using PtxOptionsTuple = std::tuple<bool, std::string>;
40 
ToTupleGpuAsmOpts41   PtxOptionsTuple ToTuple() {
42     return std::make_tuple(disable_gpuasm_optimizations, preferred_cuda_dir);
43   }
44 };
45 
46 // Compiles the given PTX string using ptxas and returns the resulting machine
47 // code (i.e. a cubin) as a byte array.
48 //
49 // compile_ptx_options is used to query for the CUDA location in case it is
50 // customized in a passed flag, and for controlling ptxas optimizations.
51 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int device_ordinal,
52                                                  const char* ptx_contents,
53                                                  GpuAsmOpts options);
54 
55 // Same as CompileGpuAsm, but caches the result, and returns unowned view of
56 // the compiled binary.
57 //
58 // A copy of the string provided in ptx will be made.
59 port::StatusOr<absl::Span<const uint8>> CompileGpuAsmOrGetCached(
60     int device_ordinal, const char* ptx, GpuAsmOpts compilation_options);
61 
62 }  // namespace stream_executor
63 
64 #endif  // TENSORFLOW_STREAM_EXECUTOR_GPU_ASM_COMPILER_H_
65