1 // 2 // Copyright 2012-2016 Francisco Jerez 3 // Copyright 2012-2016 Advanced Micro Devices, Inc. 4 // Copyright 2015 Zoltan Gilian 5 // 6 // Permission is hereby granted, free of charge, to any person obtaining a 7 // copy of this software and associated documentation files (the "Software"), 8 // to deal in the Software without restriction, including without limitation 9 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 // and/or sell copies of the Software, and to permit persons to whom the 11 // Software is furnished to do so, subject to the following conditions: 12 // 13 // The above copyright notice and this permission notice shall be included in 14 // all copies or substantial portions of the Software. 15 // 16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 // OTHER DEALINGS IN THE SOFTWARE. 23 // 24 25 /// 26 /// \file 27 /// Codegen back-end-independent part of the construction of an executable 28 /// clover::module, including kernel argument metadata extraction and 29 /// formatting of the pre-generated binary code in a form that can be 30 /// understood by pipe drivers. 31 /// 32 33 #include "llvm/codegen.hpp" 34 #include "llvm/metadata.hpp" 35 36 #include "CL/cl.h" 37 38 #include "pipe/p_state.h" 39 #include "util/u_math.h" 40 41 #include <clang/Basic/TargetInfo.h> 42 43 using namespace clover; 44 using namespace clover::llvm; 45 46 using ::llvm::Module; 47 using ::llvm::Function; 48 using ::llvm::Type; 49 using ::llvm::isa; 50 using ::llvm::cast; 51 using ::llvm::dyn_cast; 52 53 namespace { 54 enum module::argument::type get_image_type(const std::string & type,const std::string & qual)55 get_image_type(const std::string &type, 56 const std::string &qual) { 57 if (type == "image2d_t" && qual == "read_only") 58 return module::argument::image2d_rd; 59 else if (type == "image2d_t" && qual == "write_only") 60 return module::argument::image2d_wr; 61 else if (type == "image3d_t" && qual == "read_only") 62 return module::argument::image3d_rd; 63 else if (type == "image3d_t" && qual == "write_only") 64 return module::argument::image3d_wr; 65 else 66 unreachable("Unknown image type"); 67 } 68 create_arg_info(const std::string & arg_name,const std::string & type_name,const std::string & type_qualifier,const int address_qualifier,const std::string & access_qualifier)69 module::arg_info create_arg_info(const std::string &arg_name, 70 const std::string &type_name, 71 const std::string &type_qualifier, 72 const int address_qualifier, 73 const std::string &access_qualifier) { 74 75 cl_kernel_arg_type_qualifier cl_type_qualifier = 76 CL_KERNEL_ARG_TYPE_NONE; 77 if (type_qualifier.find("const") != std::string::npos) 78 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_CONST; 79 if (type_qualifier.find("restrict") != std::string::npos) 80 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_RESTRICT; 81 if (type_qualifier.find("volatile") != std::string::npos) 82 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_VOLATILE; 83 84 cl_kernel_arg_address_qualifier cl_address_qualifier = 85 CL_KERNEL_ARG_ADDRESS_PRIVATE; 86 if (address_qualifier == 1) 87 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL; 88 else if (address_qualifier == 2) 89 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_CONSTANT; 90 else if (address_qualifier == 3) 91 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_LOCAL; 92 93 cl_kernel_arg_access_qualifier cl_access_qualifier = 94 CL_KERNEL_ARG_ACCESS_NONE; 95 if (access_qualifier == "read_only") 96 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY; 97 else if (access_qualifier == "write_only") 98 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY; 99 else if (access_qualifier == "read_write") 100 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE; 101 102 return module::arg_info(arg_name, type_name, cl_type_qualifier, 103 cl_address_qualifier, cl_access_qualifier); 104 } 105 106 std::vector<size_t> get_reqd_work_group_size(const Module & mod,const std::string & kernel_name)107 get_reqd_work_group_size(const Module &mod, 108 const std::string &kernel_name) { 109 const Function &f = *mod.getFunction(kernel_name); 110 auto vector_metadata = get_uint_vector_kernel_metadata(f, "reqd_work_group_size"); 111 112 return vector_metadata.empty() ? std::vector<size_t>({0, 0, 0}) : vector_metadata; 113 } 114 115 116 std::string kernel_attributes(const Module & mod,const std::string & kernel_name)117 kernel_attributes(const Module &mod, const std::string &kernel_name) { 118 std::vector<std::string> attributes; 119 120 const Function &f = *mod.getFunction(kernel_name); 121 122 auto vec_type_hint = get_type_kernel_metadata(f, "vec_type_hint"); 123 if (!vec_type_hint.empty()) 124 attributes.emplace_back("vec_type_hint(" + vec_type_hint + ")"); 125 126 auto work_group_size_hint = get_uint_vector_kernel_metadata(f, "work_group_size_hint"); 127 if (!work_group_size_hint.empty()) { 128 std::string s = "work_group_size_hint("; 129 s += detokenize(work_group_size_hint, ","); 130 s += ")"; 131 attributes.emplace_back(s); 132 } 133 134 auto reqd_work_group_size = get_uint_vector_kernel_metadata(f, "reqd_work_group_size"); 135 if (!reqd_work_group_size.empty()) { 136 std::string s = "reqd_work_group_size("; 137 s += detokenize(reqd_work_group_size, ","); 138 s += ")"; 139 attributes.emplace_back(s); 140 } 141 142 auto nosvm = get_str_kernel_metadata(f, "nosvm"); 143 if (!nosvm.empty()) 144 attributes.emplace_back("nosvm"); 145 146 return detokenize(attributes, " "); 147 } 148 149 std::vector<module::argument> make_kernel_args(const Module & mod,const std::string & kernel_name,const clang::CompilerInstance & c)150 make_kernel_args(const Module &mod, const std::string &kernel_name, 151 const clang::CompilerInstance &c) { 152 std::vector<module::argument> args; 153 const Function &f = *mod.getFunction(kernel_name); 154 ::llvm::DataLayout dl(&mod); 155 const auto size_type = 156 dl.getSmallestLegalIntType(mod.getContext(), sizeof(cl_uint) * 8); 157 158 for (const auto &arg : f.args()) { 159 const auto arg_type = arg.getType(); 160 161 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data 162 // type that is not a power of two bytes in size must be 163 // aligned to the next larger power of two. 164 // This rule applies to built-in types only, not structs or unions." 165 const unsigned arg_api_size = dl.getTypeAllocSize(arg_type); 166 167 const unsigned target_size = dl.getTypeStoreSize(arg_type); 168 const unsigned target_align = dl.getABITypeAlignment(arg_type); 169 170 const auto type_name = get_str_argument_metadata(f, arg, 171 "kernel_arg_type"); 172 if (type_name == "image2d_t" || type_name == "image3d_t") { 173 // Image. 174 const auto access_qual = get_str_argument_metadata( 175 f, arg, "kernel_arg_access_qual"); 176 args.emplace_back(get_image_type(type_name, access_qual), 177 target_size, target_size, 178 target_align, module::argument::zero_ext); 179 180 } else if (type_name == "sampler_t") { 181 args.emplace_back(module::argument::sampler, arg_api_size, 182 target_size, target_align, 183 module::argument::zero_ext); 184 185 } else if (type_name == "__llvm_image_size") { 186 // Image size implicit argument. 187 args.emplace_back(module::argument::scalar, sizeof(cl_uint), 188 dl.getTypeStoreSize(size_type), 189 dl.getABITypeAlignment(size_type), 190 module::argument::zero_ext, 191 module::argument::image_size); 192 193 } else if (type_name == "__llvm_image_format") { 194 // Image format implicit argument. 195 args.emplace_back(module::argument::scalar, sizeof(cl_uint), 196 dl.getTypeStoreSize(size_type), 197 dl.getABITypeAlignment(size_type), 198 module::argument::zero_ext, 199 module::argument::image_format); 200 201 } else { 202 // Other types. 203 const auto actual_type = 204 isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ? 205 cast< ::llvm::PointerType>(arg_type)->getElementType() : arg_type; 206 207 if (actual_type->isPointerTy()) { 208 const unsigned address_space = 209 cast< ::llvm::PointerType>(actual_type)->getAddressSpace(); 210 211 const auto &map = c.getTarget().getAddressSpaceMap(); 212 const auto offset = 213 static_cast<unsigned>(clang::LangAS::opencl_local); 214 if (address_space == map[offset]) { 215 args.emplace_back(module::argument::local, arg_api_size, 216 target_size, target_align, 217 module::argument::zero_ext); 218 } else { 219 // XXX: Correctly handle constant address space. There is no 220 // way for r600g to pass a handle for constant buffers back 221 // to clover like it can for global buffers, so 222 // creating constant arguments will break r600g. For now, 223 // continue treating constant buffers as global buffers 224 // until we can come up with a way to create handles for 225 // constant buffers. 226 args.emplace_back(module::argument::global, arg_api_size, 227 target_size, target_align, 228 module::argument::zero_ext); 229 } 230 231 } else { 232 const bool needs_sign_ext = f.getAttributes().hasAttribute( 233 arg.getArgNo() + 1, ::llvm::Attribute::SExt); 234 235 args.emplace_back(module::argument::scalar, arg_api_size, 236 target_size, target_align, 237 (needs_sign_ext ? module::argument::sign_ext : 238 module::argument::zero_ext)); 239 } 240 241 // Add kernel argument infos if built with -cl-kernel-arg-info. 242 if (c.getCodeGenOpts().EmitOpenCLArgMetadata) { 243 args.back().info = create_arg_info( 244 get_str_argument_metadata(f, arg, "kernel_arg_name"), 245 type_name, 246 get_str_argument_metadata(f, arg, "kernel_arg_type_qual"), 247 get_uint_argument_metadata(f, arg, "kernel_arg_addr_space"), 248 get_str_argument_metadata(f, arg, "kernel_arg_access_qual")); 249 } 250 } 251 } 252 253 // Append implicit arguments. XXX - The types, ordering and 254 // vector size of the implicit arguments should depend on the 255 // target according to the selected calling convention. 256 args.emplace_back(module::argument::scalar, sizeof(cl_uint), 257 dl.getTypeStoreSize(size_type), 258 dl.getABITypeAlignment(size_type), 259 module::argument::zero_ext, 260 module::argument::grid_dimension); 261 262 args.emplace_back(module::argument::scalar, sizeof(cl_uint), 263 dl.getTypeStoreSize(size_type), 264 dl.getABITypeAlignment(size_type), 265 module::argument::zero_ext, 266 module::argument::grid_offset); 267 268 return args; 269 } 270 271 module::section make_text_section(const std::vector<char> & code)272 make_text_section(const std::vector<char> &code) { 273 const pipe_binary_program_header header { uint32_t(code.size()) }; 274 module::section text { 0, module::section::text_executable, 275 header.num_bytes, {} }; 276 277 text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header), 278 reinterpret_cast<const char *>(&header) + sizeof(header)); 279 text.data.insert(text.data.end(), code.begin(), code.end()); 280 281 return text; 282 } 283 } 284 285 module 286 clover::llvm::build_module_common(const Module &mod, 287 const std::vector<char> &code, 288 const std::map<std::string, 289 unsigned> &offsets, 290 const clang::CompilerInstance &c) { 291 module m; 292 293 for (const auto &llvm_name : map(std::mem_fn(&Function::getName), 294 get_kernels(mod))) { 295 const ::std::string name(llvm_name); 296 if (offsets.count(name)) 297 m.syms.emplace_back(name, kernel_attributes(mod, name), 298 get_reqd_work_group_size(mod, name), 299 0, offsets.at(name), 300 make_kernel_args(mod, name, c)); 301 } 302 303 m.secs.push_back(make_text_section(code)); 304 return m; 305 } 306