1 //
2 // Copyright 2012-2016 Francisco Jerez
3 // Copyright 2012-2016 Advanced Micro Devices, Inc.
4 // Copyright 2015 Zoltan Gilian
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a
7 // copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the
11 // Software is furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 // OTHER DEALINGS IN THE SOFTWARE.
23 //
24
25 ///
26 /// \file
27 /// Codegen back-end-independent part of the construction of an executable
28 /// clover::binary, including kernel argument metadata extraction and
29 /// formatting of the pre-generated binary code in a form that can be
30 /// understood by pipe drivers.
31 ///
32
33 #include <llvm/Support/Allocator.h>
34
35 #include "llvm/codegen.hpp"
36 #include "llvm/metadata.hpp"
37
38 #include "CL/cl.h"
39
40 #include "pipe/p_state.h"
41 #include "util/u_math.h"
42
43 #include <clang/Basic/TargetInfo.h>
44
45 using clover::binary;
46 using clover::detokenize;
47 using namespace clover::llvm;
48
49 using ::llvm::Module;
50 using ::llvm::Function;
51 using ::llvm::Type;
52 using ::llvm::isa;
53 using ::llvm::cast;
54 using ::llvm::dyn_cast;
55
56 namespace {
57 enum binary::argument::type
get_image_type(const std::string & type,const std::string & qual)58 get_image_type(const std::string &type,
59 const std::string &qual) {
60 if (type == "image1d_t" || type == "image2d_t" || type == "image3d_t") {
61 if (qual == "read_only")
62 return binary::argument::image_rd;
63 else if (qual == "write_only")
64 return binary::argument::image_wr;
65 }
66
67 unreachable("Unsupported image type");
68 }
69
create_arg_info(const std::string & arg_name,const std::string & type_name,const std::string & type_qualifier,const uint64_t address_qualifier,const std::string & access_qualifier)70 binary::arg_info create_arg_info(const std::string &arg_name,
71 const std::string &type_name,
72 const std::string &type_qualifier,
73 const uint64_t address_qualifier,
74 const std::string &access_qualifier) {
75
76 cl_kernel_arg_type_qualifier cl_type_qualifier =
77 CL_KERNEL_ARG_TYPE_NONE;
78 if (type_qualifier.find("const") != std::string::npos)
79 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_CONST;
80 if (type_qualifier.find("restrict") != std::string::npos)
81 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_RESTRICT;
82 if (type_qualifier.find("volatile") != std::string::npos)
83 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_VOLATILE;
84
85 cl_kernel_arg_address_qualifier cl_address_qualifier =
86 CL_KERNEL_ARG_ADDRESS_PRIVATE;
87 if (address_qualifier == 1)
88 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
89 else if (address_qualifier == 2)
90 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_CONSTANT;
91 else if (address_qualifier == 3)
92 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_LOCAL;
93
94 cl_kernel_arg_access_qualifier cl_access_qualifier =
95 CL_KERNEL_ARG_ACCESS_NONE;
96 if (access_qualifier == "read_only")
97 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY;
98 else if (access_qualifier == "write_only")
99 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
100 else if (access_qualifier == "read_write")
101 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE;
102
103 return binary::arg_info(arg_name, type_name, cl_type_qualifier,
104 cl_address_qualifier, cl_access_qualifier);
105 }
106
107 std::vector<size_t>
get_reqd_work_group_size(const Module & mod,const std::string & kernel_name)108 get_reqd_work_group_size(const Module &mod,
109 const std::string &kernel_name) {
110 const Function &f = *mod.getFunction(kernel_name);
111 auto vector_metadata = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
112
113 return vector_metadata.empty() ? std::vector<size_t>({0, 0, 0}) : vector_metadata;
114 }
115
116
117 std::string
kernel_attributes(const Module & mod,const std::string & kernel_name)118 kernel_attributes(const Module &mod, const std::string &kernel_name) {
119 std::vector<std::string> attributes;
120
121 const Function &f = *mod.getFunction(kernel_name);
122
123 auto vec_type_hint = get_type_kernel_metadata(f, "vec_type_hint");
124 if (!vec_type_hint.empty())
125 attributes.emplace_back("vec_type_hint(" + vec_type_hint + ")");
126
127 auto work_group_size_hint = get_uint_vector_kernel_metadata(f, "work_group_size_hint");
128 if (!work_group_size_hint.empty()) {
129 std::string s = "work_group_size_hint(";
130 s += detokenize(work_group_size_hint, ",");
131 s += ")";
132 attributes.emplace_back(s);
133 }
134
135 auto reqd_work_group_size = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
136 if (!reqd_work_group_size.empty()) {
137 std::string s = "reqd_work_group_size(";
138 s += detokenize(reqd_work_group_size, ",");
139 s += ")";
140 attributes.emplace_back(s);
141 }
142
143 auto nosvm = get_str_kernel_metadata(f, "nosvm");
144 if (!nosvm.empty())
145 attributes.emplace_back("nosvm");
146
147 return detokenize(attributes, " ");
148 }
149
150 std::vector<binary::argument>
make_kernel_args(const Module & mod,const std::string & kernel_name,const clang::CompilerInstance & c)151 make_kernel_args(const Module &mod, const std::string &kernel_name,
152 const clang::CompilerInstance &c) {
153 std::vector<binary::argument> args;
154 const Function &f = *mod.getFunction(kernel_name);
155 ::llvm::DataLayout dl(&mod);
156 const auto size_type =
157 dl.getSmallestLegalIntType(mod.getContext(), sizeof(cl_uint) * 8);
158
159 for (const auto &arg : f.args()) {
160 const auto arg_type = arg.getType();
161
162 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
163 // type that is not a power of two bytes in size must be
164 // aligned to the next larger power of two.
165 // This rule applies to built-in types only, not structs or unions."
166 const unsigned arg_api_size = dl.getTypeAllocSize(arg_type);
167
168 const unsigned target_size = dl.getTypeStoreSize(arg_type);
169 const unsigned target_align = dl.getABITypeAlignment(arg_type);
170
171 const auto type_name = get_str_argument_metadata(f, arg,
172 "kernel_arg_type");
173 if (type_name == "image2d_t" || type_name == "image3d_t") {
174 // Image.
175 const auto access_qual = get_str_argument_metadata(
176 f, arg, "kernel_arg_access_qual");
177 args.emplace_back(get_image_type(type_name, access_qual),
178 target_size, target_size,
179 target_align, binary::argument::zero_ext);
180
181 } else if (type_name == "sampler_t") {
182 args.emplace_back(binary::argument::sampler, arg_api_size,
183 target_size, target_align,
184 binary::argument::zero_ext);
185
186 } else if (type_name == "__llvm_image_size") {
187 // Image size implicit argument.
188 args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
189 dl.getTypeStoreSize(size_type),
190 dl.getABITypeAlignment(size_type),
191 binary::argument::zero_ext,
192 binary::argument::image_size);
193
194 } else if (type_name == "__llvm_image_format") {
195 // Image format implicit argument.
196 args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
197 dl.getTypeStoreSize(size_type),
198 dl.getABITypeAlignment(size_type),
199 binary::argument::zero_ext,
200 binary::argument::image_format);
201
202 } else {
203 // Other types.
204 const auto actual_type =
205 isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
206 cast< ::llvm::PointerType>(arg_type)->getElementType() : arg_type;
207
208 if (actual_type->isPointerTy()) {
209 const unsigned address_space =
210 cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
211
212 const auto &map = c.getTarget().getAddressSpaceMap();
213 const auto offset =
214 static_cast<unsigned>(clang::LangAS::opencl_local);
215 if (address_space == map[offset]) {
216 const auto pointee_type = cast<
217 ::llvm::PointerType>(actual_type)->getElementType();
218 args.emplace_back(binary::argument::local, arg_api_size,
219 target_size,
220 dl.getABITypeAlignment(pointee_type),
221 binary::argument::zero_ext);
222 } else {
223 // XXX: Correctly handle constant address space. There is no
224 // way for r600g to pass a handle for constant buffers back
225 // to clover like it can for global buffers, so
226 // creating constant arguments will break r600g. For now,
227 // continue treating constant buffers as global buffers
228 // until we can come up with a way to create handles for
229 // constant buffers.
230 args.emplace_back(binary::argument::global, arg_api_size,
231 target_size, target_align,
232 binary::argument::zero_ext);
233 }
234
235 } else {
236 const bool needs_sign_ext = f.getAttributes().hasParamAttr(
237 arg.getArgNo(), ::llvm::Attribute::SExt);
238
239 args.emplace_back(binary::argument::scalar, arg_api_size,
240 target_size, target_align,
241 (needs_sign_ext ? binary::argument::sign_ext :
242 binary::argument::zero_ext));
243 }
244
245 // Add kernel argument infos if built with -cl-kernel-arg-info.
246 if (c.getCodeGenOpts().EmitOpenCLArgMetadata) {
247 args.back().info = create_arg_info(
248 get_str_argument_metadata(f, arg, "kernel_arg_name"),
249 type_name,
250 get_str_argument_metadata(f, arg, "kernel_arg_type_qual"),
251 get_uint_argument_metadata(f, arg, "kernel_arg_addr_space"),
252 get_str_argument_metadata(f, arg, "kernel_arg_access_qual"));
253 }
254 }
255 }
256
257 // Append implicit arguments. XXX - The types, ordering and
258 // vector size of the implicit arguments should depend on the
259 // target according to the selected calling convention.
260 args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
261 dl.getTypeStoreSize(size_type),
262 dl.getABITypeAlignment(size_type),
263 binary::argument::zero_ext,
264 binary::argument::grid_dimension);
265
266 args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
267 dl.getTypeStoreSize(size_type),
268 dl.getABITypeAlignment(size_type),
269 binary::argument::zero_ext,
270 binary::argument::grid_offset);
271
272 return args;
273 }
274
275 binary::section
make_text_section(const std::vector<char> & code)276 make_text_section(const std::vector<char> &code) {
277 const pipe_binary_program_header header { uint32_t(code.size()) };
278 binary::section text { 0, binary::section::text_executable,
279 header.num_bytes, {} };
280
281 text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
282 reinterpret_cast<const char *>(&header) + sizeof(header));
283 text.data.insert(text.data.end(), code.begin(), code.end());
284
285 return text;
286 }
287 }
288
289 binary
build_module_common(const Module & mod,const std::vector<char> & code,const std::map<std::string,unsigned> & offsets,const clang::CompilerInstance & c)290 clover::llvm::build_module_common(const Module &mod,
291 const std::vector<char> &code,
292 const std::map<std::string,
293 unsigned> &offsets,
294 const clang::CompilerInstance &c) {
295 binary b;
296
297 for (const auto &llvm_name : map(std::mem_fn(&Function::getName),
298 get_kernels(mod))) {
299 const ::std::string name(llvm_name);
300 if (offsets.count(name))
301 b.syms.emplace_back(name, kernel_attributes(mod, name),
302 get_reqd_work_group_size(mod, name),
303 0, offsets.at(name),
304 make_kernel_args(mod, name, c));
305 }
306
307 b.secs.push_back(make_text_section(code));
308 return b;
309 }
310