• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Amber Authors.
2 // Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "src/pipeline.h"
17 
18 #include <algorithm>
19 #include <cstring>
20 #include <limits>
21 #include <set>
22 
23 #include "src/make_unique.h"
24 #include "src/type_parser.h"
25 
26 namespace amber {
27 namespace {
28 
29 const char* kDefaultColorBufferFormat = "B8G8R8A8_UNORM";
30 const char* kDefaultDepthBufferFormat = "D32_SFLOAT_S8_UINT";
31 
32 // OpenCL coordinates mode is bit 0
33 const uint32_t kOpenCLNormalizedCoordsBit = 1;
34 // OpenCL address mode bits are bits 1,2,3.
35 const uint32_t kOpenCLAddressModeBits = 0xe;
36 // OpenCL address mode bit values.
37 const uint32_t kOpenCLAddressModeNone = 0;
38 const uint32_t kOpenCLAddressModeClampToEdge = 2;
39 const uint32_t kOpenCLAddressModeClamp = 4;
40 const uint32_t kOpenCLAddressModeRepeat = 6;
41 const uint32_t kOpenCLAddressModeMirroredRepeat = 8;
42 // OpenCL filter mode bits.
43 const uint32_t kOpenCLFilterModeNearestBit = 0x10;
44 const uint32_t kOpenCLFilterModeLinearBit = 0x20;
45 
46 }  // namespace
47 
48 const char* Pipeline::kGeneratedColorBuffer = "framebuffer";
49 const char* Pipeline::kGeneratedDepthBuffer = "depth_buffer";
50 const char* Pipeline::kGeneratedPushConstantBuffer = "push_constant_buffer";
51 
ShaderInfo(Shader * shader,ShaderType type)52 Pipeline::ShaderInfo::ShaderInfo(Shader* shader, ShaderType type)
53     : shader_(shader),
54       shader_type_(type),
55       entry_point_("main"),
56       required_subgroup_size_setting_(RequiredSubgroupSizeSetting::kNotSet),
57       required_subgroup_size_(0),
58       varying_subgroup_size_(false),
59       require_full_subgroups_(false) {}
60 
61 Pipeline::ShaderInfo::ShaderInfo(const ShaderInfo&) = default;
62 
63 Pipeline::ShaderInfo::~ShaderInfo() = default;
64 
Pipeline(PipelineType type)65 Pipeline::Pipeline(PipelineType type) : pipeline_type_(type) {
66 }
67 
68 Pipeline::~Pipeline() = default;
69 
Clone() const70 std::unique_ptr<Pipeline> Pipeline::Clone() const {
71   auto clone = MakeUnique<Pipeline>(pipeline_type_);
72   clone->shaders_ = shaders_;
73   clone->color_attachments_ = color_attachments_;
74   clone->vertex_buffers_ = vertex_buffers_;
75   clone->buffers_ = buffers_;
76   clone->depth_stencil_buffer_ = depth_stencil_buffer_;
77   clone->index_buffer_ = index_buffer_;
78   clone->fb_width_ = fb_width_;
79   clone->fb_height_ = fb_height_;
80   clone->set_arg_values_ = set_arg_values_;
81   clone->pipeline_data_ = pipeline_data_;
82 
83   if (!opencl_pod_buffers_.empty()) {
84     // Generate specific buffers for the clone.
85     clone->GenerateOpenCLPodBuffers();
86   }
87 
88   return clone;
89 }
90 
AddShader(Shader * shader,ShaderType shader_type)91 Result Pipeline::AddShader(Shader* shader, ShaderType shader_type) {
92   if (!shader)
93     return Result("shader can not be null when attached to pipeline");
94 
95   if (pipeline_type_ == PipelineType::kCompute &&
96       shader_type != kShaderTypeCompute) {
97     return Result("only compute shaders allowed in a compute pipeline");
98   }
99   if (pipeline_type_ == PipelineType::kGraphics &&
100       shader_type == kShaderTypeCompute) {
101     return Result("can not add a compute shader to a graphics pipeline");
102   }
103 
104   if (pipeline_type_ != PipelineType::kRayTracing) {
105     for (auto& info : shaders_) {
106       const auto* is = info.GetShader();
107       if (is == shader)
108         return Result("can not add duplicate shader to pipeline");
109       if (is->GetType() == shader_type) {
110         info.SetShader(shader);
111         return {};
112       }
113     }
114   }
115 
116   shaders_.emplace_back(shader, shader_type);
117   return {};
118 }
119 
SetShaderOptimizations(const Shader * shader,const std::vector<std::string> & opts)120 Result Pipeline::SetShaderOptimizations(const Shader* shader,
121                                         const std::vector<std::string>& opts) {
122   if (!shader)
123     return Result("invalid shader specified for optimizations");
124 
125   std::set<std::string> seen;
126   for (const auto& opt : opts) {
127     if (seen.count(opt) != 0)
128       return Result("duplicate optimization flag (" + opt + ") set on shader");
129 
130     seen.insert(opt);
131   }
132 
133   for (auto& info : shaders_) {
134     const auto* is = info.GetShader();
135     if (is == shader) {
136       info.SetShaderOptimizations(opts);
137       return {};
138     }
139   }
140 
141   return Result("unknown shader specified for optimizations: " +
142                 shader->GetName());
143 }
144 
SetShaderCompileOptions(const Shader * shader,const std::vector<std::string> & opts)145 Result Pipeline::SetShaderCompileOptions(const Shader* shader,
146                                          const std::vector<std::string>& opts) {
147   if (!shader)
148     return Result("invalid shader specified for compile options");
149 
150   for (auto& info : shaders_) {
151     const auto* is = info.GetShader();
152     if (is == shader) {
153       info.SetCompileOptions(opts);
154       return {};
155     }
156   }
157 
158   return Result("unknown shader specified for compile options: " +
159                 shader->GetName());
160 }
161 
SetShaderRequiredSubgroupSize(const Shader * shader,const ShaderInfo::RequiredSubgroupSizeSetting setting,const uint32_t size)162 Result Pipeline::SetShaderRequiredSubgroupSize(
163     const Shader* shader,
164     const ShaderInfo::RequiredSubgroupSizeSetting setting,
165     const uint32_t size) {
166   if (!shader)
167     return Result("invalid shader specified for  required subgroup size");
168 
169   for (auto& info : shaders_) {
170     const auto* is = info.GetShader();
171     if (is == shader) {
172       info.SetRequiredSubgroupSizeSetting(setting, size);
173       return {};
174     }
175   }
176 
177   return Result("unknown shader specified for required subgroup size: " +
178                 shader->GetName());
179 }
180 
SetShaderRequiredSubgroupSize(const Shader * shader,const uint32_t subgroupSize)181 Result Pipeline::SetShaderRequiredSubgroupSize(const Shader* shader,
182                                                const uint32_t subgroupSize) {
183   const bool isPow2 =
184       subgroupSize > 0 && (subgroupSize & (subgroupSize - 1)) == 0;
185   if (subgroupSize == 0 || subgroupSize > 128 || !isPow2) {
186     return Result("invalid required subgroup size " +
187                   std::to_string(subgroupSize) + " specified for shader name " +
188                   shader->GetName());
189   }
190   const ShaderInfo::RequiredSubgroupSizeSetting setting =
191       ShaderInfo::RequiredSubgroupSizeSetting::kSetToSpecificSize;
192   return SetShaderRequiredSubgroupSize(shader, setting, subgroupSize);
193 }
194 
SetShaderRequiredSubgroupSizeToMinimum(const Shader * shader)195 Result Pipeline::SetShaderRequiredSubgroupSizeToMinimum(const Shader* shader) {
196   const ShaderInfo::RequiredSubgroupSizeSetting subgroupSizeSetting =
197       ShaderInfo::RequiredSubgroupSizeSetting::kSetToMinimumSize;
198   return SetShaderRequiredSubgroupSize(shader, subgroupSizeSetting, 0);
199 }
200 
SetShaderRequiredSubgroupSizeToMaximum(const Shader * shader)201 Result Pipeline::SetShaderRequiredSubgroupSizeToMaximum(const Shader* shader) {
202   const ShaderInfo::RequiredSubgroupSizeSetting subgroupSizeSetting =
203       ShaderInfo::RequiredSubgroupSizeSetting::kSetToMaximumSize;
204   return SetShaderRequiredSubgroupSize(shader, subgroupSizeSetting, 0);
205 }
206 
SetShaderVaryingSubgroupSize(const Shader * shader,const bool isSet)207 Result Pipeline::SetShaderVaryingSubgroupSize(const Shader* shader,
208                                               const bool isSet) {
209   if (!shader)
210     return Result("invalid shader specified for varying subgroup size");
211 
212   for (auto& info : shaders_) {
213     const auto* is = info.GetShader();
214     if (is == shader) {
215       info.SetVaryingSubgroupSize(isSet);
216       return {};
217     }
218   }
219 
220   return Result("unknown shader specified for varying subgroup size: " +
221                 shader->GetName());
222 }
223 
SetShaderRequireFullSubgroups(const Shader * shader,const bool isSet)224 Result Pipeline::SetShaderRequireFullSubgroups(const Shader* shader,
225                                                const bool isSet) {
226   if (!shader)
227     return Result("invalid shader specified for optimizations");
228 
229   for (auto& info : shaders_) {
230     const auto* is = info.GetShader();
231     if (is == shader) {
232       info.SetRequireFullSubgroups(isSet);
233       return {};
234     }
235   }
236 
237   return Result("unknown shader specified for optimizations: " +
238                 shader->GetName());
239 }
240 
SetShaderEntryPoint(const Shader * shader,const std::string & name)241 Result Pipeline::SetShaderEntryPoint(const Shader* shader,
242                                      const std::string& name) {
243   if (!shader)
244     return Result("invalid shader specified for entry point");
245   if (name.empty())
246     return Result("entry point should not be blank");
247 
248   for (auto& info : shaders_) {
249     if (info.GetShader() == shader) {
250       if (info.GetEntryPoint() != "main")
251         return Result("multiple entry points given for the same shader");
252 
253       info.SetEntryPoint(name);
254       return {};
255     }
256   }
257 
258   return Result("unknown shader specified for entry point: " +
259                 shader->GetName());
260 }
261 
SetShaderType(const Shader * shader,ShaderType type)262 Result Pipeline::SetShaderType(const Shader* shader, ShaderType type) {
263   if (!shader)
264     return Result("invalid shader specified for shader type");
265 
266   for (auto& info : shaders_) {
267     if (info.GetShader() == shader) {
268       info.SetShaderType(type);
269       return {};
270     }
271   }
272 
273   return Result("unknown shader specified for shader type: " +
274                 shader->GetName());
275 }
276 
Validate() const277 Result Pipeline::Validate() const {
278   for (const auto& attachment : color_attachments_) {
279     if (attachment.buffer->ElementCount() !=
280         (fb_width_ << attachment.base_mip_level) *
281             (fb_height_ << attachment.base_mip_level)) {
282       return Result(
283           "shared framebuffer must have same size over all PIPELINES");
284     }
285   }
286 
287   if (depth_stencil_buffer_.buffer &&
288       depth_stencil_buffer_.buffer->ElementCount() != fb_width_ * fb_height_) {
289     return Result("shared depth buffer must have same size over all PIPELINES");
290   }
291 
292   for (auto& buf : GetBuffers()) {
293     if (buf.buffer->GetFormat() == nullptr) {
294       return Result("buffer (" + std::to_string(buf.descriptor_set) + ":" +
295                     std::to_string(buf.binding) + ") requires a format");
296     }
297   }
298 
299   if (pipeline_type_ == PipelineType::kRayTracing)
300     return ValidateRayTracing();
301   else if (pipeline_type_ == PipelineType::kGraphics)
302     return ValidateGraphics();
303 
304   return ValidateCompute();
305 }
306 
ValidateRayTracing() const307 Result Pipeline::ValidateRayTracing() const {
308   if (shader_groups_.empty() && shaders_.empty() && tlases_.empty())
309     return Result("Shader groups are missing");
310 
311   return {};
312 }
313 
ValidateGraphics() const314 Result Pipeline::ValidateGraphics() const {
315   if (color_attachments_.empty())
316     return Result("PIPELINE missing color attachment");
317 
318   bool found_vertex = false;
319   for (const auto& info : shaders_) {
320     const auto* s = info.GetShader();
321     if (s->GetType() == kShaderTypeVertex) {
322       found_vertex = true;
323       break;
324     }
325   }
326 
327   if (!found_vertex)
328     return Result("graphics pipeline requires a vertex shader");
329 
330   for (const auto& att : color_attachments_) {
331     auto width = att.buffer->GetWidth();
332     auto height = att.buffer->GetHeight();
333     for (uint32_t level = 1; level < att.buffer->GetMipLevels(); level++) {
334       width >>= 1;
335       if (width == 0)
336         return Result("color attachment with " +
337                       std::to_string(att.buffer->GetMipLevels()) +
338                       " mip levels would have zero width for level " +
339                       std::to_string(level));
340       height >>= 1;
341       if (height == 0)
342         return Result("color attachment with " +
343                       std::to_string(att.buffer->GetMipLevels()) +
344                       " mip levels would have zero height for level " +
345                       std::to_string(level));
346     }
347   }
348 
349   return {};
350 }
351 
ValidateCompute() const352 Result Pipeline::ValidateCompute() const {
353   if (shaders_.empty())
354     return Result("compute pipeline requires a compute shader");
355 
356   return {};
357 }
358 
UpdateFramebufferSizes()359 void Pipeline::UpdateFramebufferSizes() {
360   uint32_t size = fb_width_ * fb_height_;
361   if (size == 0)
362     return;
363 
364   for (auto& attachment : color_attachments_) {
365     auto mip0_width = fb_width_ << attachment.base_mip_level;
366     auto mip0_height = fb_height_ << attachment.base_mip_level;
367     attachment.buffer->SetWidth(mip0_width);
368     attachment.buffer->SetHeight(mip0_height);
369     attachment.buffer->SetElementCount(mip0_width * mip0_height);
370   }
371 
372   if (depth_stencil_buffer_.buffer) {
373     depth_stencil_buffer_.buffer->SetWidth(fb_width_);
374     depth_stencil_buffer_.buffer->SetHeight(fb_height_);
375     depth_stencil_buffer_.buffer->SetElementCount(size);
376   }
377 }
378 
AddColorAttachment(Buffer * buf,uint32_t location,uint32_t base_mip_level)379 Result Pipeline::AddColorAttachment(Buffer* buf,
380                                     uint32_t location,
381                                     uint32_t base_mip_level) {
382   for (const auto& attachment : color_attachments_) {
383     if (attachment.location == location)
384       return Result("can not bind two color buffers to the same LOCATION");
385     if (attachment.buffer == buf)
386       return Result("color buffer may only be bound to a PIPELINE once");
387   }
388 
389   color_attachments_.push_back(BufferInfo{buf});
390 
391   auto& info = color_attachments_.back();
392   info.location = location;
393   info.type = BufferType::kColor;
394   info.base_mip_level = base_mip_level;
395   auto mip0_width = fb_width_ << base_mip_level;
396   auto mip0_height = fb_height_ << base_mip_level;
397   buf->SetWidth(mip0_width);
398   buf->SetHeight(mip0_height);
399   buf->SetElementCount(mip0_width * mip0_height);
400 
401   return {};
402 }
403 
AddResolveTarget(Buffer * buf)404 Result Pipeline::AddResolveTarget(Buffer* buf) {
405   resolve_targets_.push_back(BufferInfo{buf});
406 
407   auto& info = resolve_targets_.back();
408   info.type = BufferType::kResolve;
409   buf->SetWidth(fb_width_);
410   buf->SetHeight(fb_height_);
411   buf->SetElementCount(fb_width_ * fb_height_);
412 
413   return {};
414 }
415 
GetLocationForColorAttachment(Buffer * buf,uint32_t * loc) const416 Result Pipeline::GetLocationForColorAttachment(Buffer* buf,
417                                                uint32_t* loc) const {
418   for (const auto& info : color_attachments_) {
419     if (info.buffer == buf) {
420       *loc = info.location;
421       return {};
422     }
423   }
424   return Result("Unable to find requested buffer");
425 }
426 
SetDepthStencilBuffer(Buffer * buf)427 Result Pipeline::SetDepthStencilBuffer(Buffer* buf) {
428   if (depth_stencil_buffer_.buffer != nullptr)
429     return Result("can only bind one depth/stencil buffer in a PIPELINE");
430 
431   depth_stencil_buffer_.buffer = buf;
432   depth_stencil_buffer_.type = BufferType::kDepthStencil;
433 
434   buf->SetWidth(fb_width_);
435   buf->SetHeight(fb_height_);
436   buf->SetElementCount(fb_width_ * fb_height_);
437   return {};
438 }
439 
SetIndexBuffer(Buffer * buf)440 Result Pipeline::SetIndexBuffer(Buffer* buf) {
441   if (index_buffer_ != nullptr)
442     return Result("can only bind one INDEX_DATA buffer in a pipeline");
443 
444   index_buffer_ = buf;
445   return {};
446 }
447 
AddVertexBuffer(Buffer * buf,uint32_t location,InputRate rate,Format * format,uint32_t offset,uint32_t stride)448 Result Pipeline::AddVertexBuffer(Buffer* buf,
449                                  uint32_t location,
450                                  InputRate rate,
451                                  Format* format,
452                                  uint32_t offset,
453                                  uint32_t stride) {
454   for (const auto& vtex : vertex_buffers_) {
455     if (vtex.location == location)
456       return Result("can not bind two vertex buffers to the same LOCATION");
457   }
458 
459   vertex_buffers_.push_back(BufferInfo{buf});
460   vertex_buffers_.back().location = location;
461   vertex_buffers_.back().type = BufferType::kVertex;
462   vertex_buffers_.back().input_rate = rate;
463   vertex_buffers_.back().format = format;
464   vertex_buffers_.back().offset = offset;
465   vertex_buffers_.back().stride = stride;
466   return {};
467 }
468 
SetPushConstantBuffer(Buffer * buf)469 Result Pipeline::SetPushConstantBuffer(Buffer* buf) {
470   if (push_constant_buffer_.buffer != nullptr)
471     return Result("can only bind one push constant buffer in a PIPELINE");
472 
473   push_constant_buffer_.buffer = buf;
474   push_constant_buffer_.type = BufferType::kPushConstant;
475   return {};
476 }
477 
CreatePushConstantBuffer()478 Result Pipeline::CreatePushConstantBuffer() {
479   if (push_constant_buffer_.buffer != nullptr)
480     return Result("can only bind one push constant buffer in a PIPELINE");
481 
482   TypeParser parser;
483   auto type = parser.Parse("R8_UINT");
484   auto fmt = MakeUnique<Format>(type.get());
485 
486   std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
487   buf->SetName(kGeneratedPushConstantBuffer);
488   buf->SetFormat(fmt.get());
489 
490   push_constant_buffer_.buffer = buf.get();
491   push_constant_buffer_.type = BufferType::kPushConstant;
492 
493   formats_.push_back(std::move(fmt));
494   types_.push_back(std::move(type));
495   opencl_push_constants_ = std::move(buf);
496 
497   return {};
498 }
499 
GenerateDefaultColorAttachmentBuffer()500 std::unique_ptr<Buffer> Pipeline::GenerateDefaultColorAttachmentBuffer() {
501   TypeParser parser;
502   auto type = parser.Parse(kDefaultColorBufferFormat);
503   auto fmt = MakeUnique<Format>(type.get());
504 
505   std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
506   buf->SetName(kGeneratedColorBuffer);
507   buf->SetFormat(fmt.get());
508 
509   formats_.push_back(std::move(fmt));
510   types_.push_back(std::move(type));
511   return buf;
512 }
513 
514 std::unique_ptr<Buffer>
GenerateDefaultDepthStencilAttachmentBuffer()515 Pipeline::GenerateDefaultDepthStencilAttachmentBuffer() {
516   TypeParser parser;
517   auto type = parser.Parse(kDefaultDepthBufferFormat);
518   auto fmt = MakeUnique<Format>(type.get());
519 
520   std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
521   buf->SetName(kGeneratedDepthBuffer);
522   buf->SetFormat(fmt.get());
523 
524   formats_.push_back(std::move(fmt));
525   types_.push_back(std::move(type));
526   return buf;
527 }
528 
GetBufferForBinding(uint32_t descriptor_set,uint32_t binding) const529 Buffer* Pipeline::GetBufferForBinding(uint32_t descriptor_set,
530                                       uint32_t binding) const {
531   for (const auto& info : buffers_) {
532     if (info.descriptor_set == descriptor_set && info.binding == binding)
533       return info.buffer;
534   }
535   return nullptr;
536 }
537 
AddBuffer(Buffer * buf,BufferType type,uint32_t descriptor_set,uint32_t binding,uint32_t base_mip_level,uint32_t dynamic_offset,uint64_t descriptor_offset,uint64_t descriptor_range)538 void Pipeline::AddBuffer(Buffer* buf,
539                          BufferType type,
540                          uint32_t descriptor_set,
541                          uint32_t binding,
542                          uint32_t base_mip_level,
543                          uint32_t dynamic_offset,
544                          uint64_t descriptor_offset,
545                          uint64_t descriptor_range) {
546   buffers_.push_back(BufferInfo{buf});
547 
548   auto& info = buffers_.back();
549   info.descriptor_set = descriptor_set;
550   info.binding = binding;
551   info.type = type;
552   info.base_mip_level = base_mip_level;
553   info.dynamic_offset = dynamic_offset;
554   info.sampler = buf->GetSampler();
555   info.descriptor_offset = descriptor_offset;
556   info.descriptor_range = descriptor_range;
557 }
558 
AddBuffer(Buffer * buf,BufferType type,const std::string & arg_name)559 void Pipeline::AddBuffer(Buffer* buf,
560                          BufferType type,
561                          const std::string& arg_name) {
562   // If this buffer binding already exists, overwrite with the new buffer.
563   for (auto& info : buffers_) {
564     if (info.arg_name == arg_name) {
565       info.buffer = buf;
566       return;
567     }
568   }
569 
570   buffers_.push_back(BufferInfo{buf});
571 
572   auto& info = buffers_.back();
573   info.type = type;
574   info.arg_name = arg_name;
575   info.descriptor_set = std::numeric_limits<uint32_t>::max();
576   info.binding = std::numeric_limits<uint32_t>::max();
577   info.arg_no = std::numeric_limits<uint32_t>::max();
578   info.base_mip_level = 0;
579   info.dynamic_offset = 0;
580 }
581 
AddBuffer(Buffer * buf,BufferType type,uint32_t arg_no)582 void Pipeline::AddBuffer(Buffer* buf, BufferType type, uint32_t arg_no) {
583   // If this buffer binding already exists, overwrite with the new buffer.
584   for (auto& info : buffers_) {
585     if (info.arg_no == arg_no) {
586       info.buffer = buf;
587       return;
588     }
589   }
590 
591   buffers_.push_back(BufferInfo{buf});
592 
593   auto& info = buffers_.back();
594   info.type = type;
595   info.arg_no = arg_no;
596   info.descriptor_set = std::numeric_limits<uint32_t>::max();
597   info.binding = std::numeric_limits<uint32_t>::max();
598   info.base_mip_level = 0;
599   info.dynamic_offset = 0;
600 }
601 
ClearBuffers(uint32_t descriptor_set,uint32_t binding)602 void Pipeline::ClearBuffers(uint32_t descriptor_set, uint32_t binding) {
603   buffers_.erase(
604       std::remove_if(buffers_.begin(), buffers_.end(),
605                      [descriptor_set, binding](BufferInfo& info) -> bool {
606                        return (info.descriptor_set == descriptor_set &&
607                                info.binding == binding);
608                      }),
609       buffers_.end());
610 }
611 
AddSampler(Sampler * sampler,uint32_t descriptor_set,uint32_t binding)612 void Pipeline::AddSampler(Sampler* sampler,
613                           uint32_t descriptor_set,
614                           uint32_t binding) {
615   samplers_.push_back(SamplerInfo{sampler});
616 
617   auto& info = samplers_.back();
618   info.descriptor_set = descriptor_set;
619   info.binding = binding;
620   info.mask = std::numeric_limits<uint32_t>::max();
621 }
622 
AddSampler(Sampler * sampler,const std::string & arg_name)623 void Pipeline::AddSampler(Sampler* sampler, const std::string& arg_name) {
624   for (auto& info : samplers_) {
625     if (info.arg_name == arg_name) {
626       info.sampler = sampler;
627       return;
628     }
629   }
630 
631   samplers_.push_back(SamplerInfo{sampler});
632 
633   auto& info = samplers_.back();
634   info.arg_name = arg_name;
635   info.descriptor_set = std::numeric_limits<uint32_t>::max();
636   info.binding = std::numeric_limits<uint32_t>::max();
637   info.arg_no = std::numeric_limits<uint32_t>::max();
638   info.mask = std::numeric_limits<uint32_t>::max();
639 }
640 
AddSampler(Sampler * sampler,uint32_t arg_no)641 void Pipeline::AddSampler(Sampler* sampler, uint32_t arg_no) {
642   for (auto& info : samplers_) {
643     if (info.arg_no == arg_no) {
644       info.sampler = sampler;
645       return;
646     }
647   }
648 
649   samplers_.push_back(SamplerInfo{sampler});
650 
651   auto& info = samplers_.back();
652   info.arg_no = arg_no;
653   info.descriptor_set = std::numeric_limits<uint32_t>::max();
654   info.binding = std::numeric_limits<uint32_t>::max();
655   info.mask = std::numeric_limits<uint32_t>::max();
656 }
657 
AddSampler(uint32_t mask,uint32_t descriptor_set,uint32_t binding)658 void Pipeline::AddSampler(uint32_t mask,
659                           uint32_t descriptor_set,
660                           uint32_t binding) {
661   samplers_.push_back(SamplerInfo{nullptr});
662 
663   auto& info = samplers_.back();
664   info.arg_name = "";
665   info.arg_no = std::numeric_limits<uint32_t>::max();
666   info.mask = mask;
667   info.descriptor_set = descriptor_set;
668   info.binding = binding;
669 }
670 
AddTLAS(TLAS * tlas,uint32_t descriptor_set,uint32_t binding)671 void Pipeline::AddTLAS(TLAS* tlas, uint32_t descriptor_set, uint32_t binding) {
672   tlases_.push_back(TLASInfo(tlas));
673 
674   auto& info = tlases_.back();
675 
676   info.descriptor_set = descriptor_set;
677   info.binding = binding;
678 }
679 
ClearSamplers(uint32_t descriptor_set,uint32_t binding)680 void Pipeline::ClearSamplers(uint32_t descriptor_set, uint32_t binding) {
681   samplers_.erase(
682       std::remove_if(samplers_.begin(), samplers_.end(),
683                      [descriptor_set, binding](SamplerInfo& info) -> bool {
684                        return (info.descriptor_set == descriptor_set &&
685                                info.binding == binding);
686                      }),
687       samplers_.end());
688 }
689 
UpdateOpenCLBufferBindings()690 Result Pipeline::UpdateOpenCLBufferBindings() {
691   if (!IsCompute() || GetShaders().empty() ||
692       GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
693     return {};
694   }
695 
696   const auto& shader_info = GetShaders()[0];
697   const auto& descriptor_map = shader_info.GetDescriptorMap();
698   if (descriptor_map.empty())
699     return {};
700 
701   const auto iter = descriptor_map.find(shader_info.GetEntryPoint());
702   if (iter == descriptor_map.end())
703     return {};
704 
705   for (auto& info : samplers_) {
706     if (info.descriptor_set == std::numeric_limits<uint32_t>::max() &&
707         info.binding == std::numeric_limits<uint32_t>::max()) {
708       for (const auto& entry : iter->second) {
709         if (entry.arg_name == info.arg_name ||
710             entry.arg_ordinal == info.arg_no) {
711           if (entry.kind !=
712               Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SAMPLER) {
713             return Result("Sampler bound to non-sampler kernel arg");
714           }
715           info.descriptor_set = entry.descriptor_set;
716           info.binding = entry.binding;
717         }
718       }
719     }
720   }
721 
722   for (auto& info : buffers_) {
723     if (info.descriptor_set == std::numeric_limits<uint32_t>::max() &&
724         info.binding == std::numeric_limits<uint32_t>::max()) {
725       for (const auto& entry : iter->second) {
726         if (entry.arg_name == info.arg_name ||
727             entry.arg_ordinal == info.arg_no) {
728           // Buffer storage class consistency checks.
729           if (info.type == BufferType::kUnknown) {
730             // Set the appropriate buffer type.
731             switch (entry.kind) {
732               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::UBO:
733               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO:
734                 info.type = BufferType::kUniform;
735                 break;
736               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SSBO:
737               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD:
738                 info.type = BufferType::kStorage;
739                 break;
740               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::RO_IMAGE:
741                 info.type = BufferType::kSampledImage;
742                 break;
743               case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::WO_IMAGE:
744                 info.type = BufferType::kStorageImage;
745                 break;
746               default:
747                 return Result("Unhandled buffer type for OPENCL-C shader");
748             }
749           } else if (info.type == BufferType::kUniform) {
750             if (entry.kind !=
751                     Pipeline::ShaderInfo::DescriptorMapEntry::Kind::UBO &&
752                 entry.kind !=
753                     Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO) {
754               return Result("Buffer " + info.buffer->GetName() +
755                             " must be a uniform binding");
756             }
757           } else if (info.type == BufferType::kStorage) {
758             if (entry.kind !=
759                     Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SSBO &&
760                 entry.kind !=
761                     Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD) {
762               return Result("Buffer " + info.buffer->GetName() +
763                             " must be a storage binding");
764             }
765           } else if (info.type == BufferType::kSampledImage) {
766             if (entry.kind !=
767                 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::RO_IMAGE) {
768               return Result("Buffer " + info.buffer->GetName() +
769                             " must be a read-only image binding");
770             }
771           } else if (info.type == BufferType::kStorageImage) {
772             if (entry.kind !=
773                 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::WO_IMAGE) {
774               return Result("Buffer " + info.buffer->GetName() +
775                             " must be a write-only image binding");
776             }
777           } else {
778             return Result("Unhandled buffer type for OPENCL-C shader");
779           }
780           info.descriptor_set = entry.descriptor_set;
781           info.binding = entry.binding;
782         }
783       }
784     }
785   }
786 
787   return {};
788 }
789 
GenerateOpenCLPodBuffers()790 Result Pipeline::GenerateOpenCLPodBuffers() {
791   if (!IsCompute() || GetShaders().empty() ||
792       GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
793     return {};
794   }
795 
796   const auto& shader_info = GetShaders()[0];
797   const auto& descriptor_map = shader_info.GetDescriptorMap();
798   if (descriptor_map.empty())
799     return {};
800 
801   const auto iter = descriptor_map.find(shader_info.GetEntryPoint());
802   if (iter == descriptor_map.end())
803     return {};
804 
805   // For each SET command, do the following:
806   // 1. Find the descriptor map entry for that argument.
807   // 2. Find or create the buffer for the descriptor set and binding pair.
808   // 3. Write the data for the SET command at the right offset.
809   for (const auto& arg_info : SetArgValues()) {
810     uint32_t descriptor_set = std::numeric_limits<uint32_t>::max();
811     uint32_t binding = std::numeric_limits<uint32_t>::max();
812     uint32_t offset = 0;
813     uint32_t arg_size = 0;
814     bool uses_name = !arg_info.name.empty();
815     Pipeline::ShaderInfo::DescriptorMapEntry::Kind kind =
816         Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD;
817     for (const auto& entry : iter->second) {
818       if (entry.kind != Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD &&
819           entry.kind !=
820               Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO &&
821           entry.kind != Pipeline::ShaderInfo::DescriptorMapEntry::Kind::
822                             POD_PUSHCONSTANT) {
823         continue;
824       }
825 
826       // Found the right entry.
827       if ((uses_name && entry.arg_name == arg_info.name) ||
828           entry.arg_ordinal == arg_info.ordinal) {
829         descriptor_set = entry.descriptor_set;
830         binding = entry.binding;
831         offset = entry.pod_offset;
832         arg_size = entry.pod_arg_size;
833         kind = entry.kind;
834         break;
835       }
836     }
837 
838     Buffer* buffer = nullptr;
839     if (kind ==
840         Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_PUSHCONSTANT) {
841       if (GetPushConstantBuffer().buffer == nullptr) {
842         auto r = CreatePushConstantBuffer();
843         if (!r.IsSuccess())
844           return r;
845       }
846       buffer = GetPushConstantBuffer().buffer;
847     } else {
848       if (descriptor_set == std::numeric_limits<uint32_t>::max() ||
849           binding == std::numeric_limits<uint32_t>::max()) {
850         std::string message =
851             "could not find descriptor map entry for SET command: kernel " +
852             shader_info.GetEntryPoint();
853         if (uses_name) {
854           message += ", name " + arg_info.name;
855         } else {
856           message += ", number " + std::to_string(arg_info.ordinal);
857         }
858         return Result(message);
859       }
860 
861       auto buf_iter = opencl_pod_buffer_map_.lower_bound(
862           std::make_pair(descriptor_set, binding));
863       if (buf_iter == opencl_pod_buffer_map_.end() ||
864           buf_iter->first.first != descriptor_set ||
865           buf_iter->first.second != binding) {
866         // Ensure no buffer was previously bound for this descriptor set and
867         // binding pair.
868         for (const auto& buf_info : GetBuffers()) {
869           if (buf_info.descriptor_set == descriptor_set &&
870               buf_info.binding == binding) {
871             return Result("previously bound buffer " +
872                           buf_info.buffer->GetName() +
873                           " to PoD args at descriptor set " +
874                           std::to_string(descriptor_set) + " binding " +
875                           std::to_string(binding));
876           }
877         }
878 
879         // Add a new buffer for this descriptor set and binding.
880         opencl_pod_buffers_.push_back(MakeUnique<Buffer>());
881         buffer = opencl_pod_buffers_.back().get();
882         auto buffer_type =
883             kind == Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD
884                 ? BufferType::kStorage
885                 : BufferType::kUniform;
886 
887         // Use an 8-bit type because all the data in the descriptor map is
888         // byte-based and it simplifies the logic for sizing below.
889         TypeParser parser;
890         auto type = parser.Parse("R8_UINT");
891         auto fmt = MakeUnique<Format>(type.get());
892         buffer->SetFormat(fmt.get());
893         formats_.push_back(std::move(fmt));
894         types_.push_back(std::move(type));
895 
896         buffer->SetName(GetName() + "_pod_buffer_" +
897                         std::to_string(descriptor_set) + "_" +
898                         std::to_string(binding));
899         opencl_pod_buffer_map_.insert(
900             buf_iter,
901             std::make_pair(std::make_pair(descriptor_set, binding), buffer));
902         AddBuffer(buffer, buffer_type, descriptor_set, binding, 0, 0, 0, ~0ULL);
903       } else {
904         buffer = buf_iter->second;
905       }
906 
907       // Resize if necessary.
908       if (buffer->ValueCount() < offset + arg_size) {
909         buffer->SetSizeInElements(offset + arg_size);
910       }
911 
912       // Check the data size.
913       if (arg_size != arg_info.fmt->SizeInBytes()) {
914         std::string message = "SET command uses incorrect data size: kernel " +
915                               shader_info.GetEntryPoint();
916         if (uses_name) {
917           message += ", name " + arg_info.name;
918         } else {
919           message += ", number " + std::to_string(arg_info.ordinal);
920         }
921         return Result(message);
922       }
923     }
924 
925     // Convert the argument value into bytes. Currently, only scalar arguments
926     // are supported.
927     const auto arg_byte_size = arg_info.fmt->SizeInBytes();
928     std::vector<Value> data_bytes;
929     for (uint32_t i = 0; i < arg_byte_size; ++i) {
930       Value v;
931       if (arg_info.value.IsFloat()) {
932         if (arg_byte_size == sizeof(double)) {
933           union {
934             uint64_t u;
935             double d;
936           } u;
937           u.d = arg_info.value.AsDouble();
938           v.SetIntValue((u.u >> (i * 8)) & 0xff);
939         } else {
940           union {
941             uint32_t u;
942             float f;
943           } u;
944           u.f = arg_info.value.AsFloat();
945           v.SetIntValue((u.u >> (i * 8)) & 0xff);
946         }
947       } else {
948         v.SetIntValue((arg_info.value.AsUint64() >> (i * 8)) & 0xff);
949       }
950       data_bytes.push_back(v);
951     }
952     Result r = buffer->SetDataWithOffset(data_bytes, offset);
953     if (!r.IsSuccess())
954       return r;
955   }
956 
957   return {};
958 }
959 
GenerateOpenCLLiteralSamplers()960 Result Pipeline::GenerateOpenCLLiteralSamplers() {
961   for (auto& info : samplers_) {
962     if (info.sampler || info.mask == std::numeric_limits<uint32_t>::max())
963       continue;
964 
965     auto literal_sampler = MakeUnique<Sampler>();
966     literal_sampler->SetName("literal." + std::to_string(info.descriptor_set) +
967                              "." + std::to_string(info.binding));
968 
969     // The values for addressing modes, filtering modes and coordinate
970     // normalization are all defined in the OpenCL header.
971 
972     literal_sampler->SetNormalizedCoords(info.mask &
973                                          kOpenCLNormalizedCoordsBit);
974 
975     uint32_t addressing_bits = info.mask & kOpenCLAddressModeBits;
976     AddressMode addressing_mode = AddressMode::kUnknown;
977     if (addressing_bits == kOpenCLAddressModeNone ||
978         addressing_bits == kOpenCLAddressModeClampToEdge) {
979       // CLK_ADDRESS_NONE
980       // CLK_ADDERSS_CLAMP_TO_EDGE
981       addressing_mode = AddressMode::kClampToEdge;
982     } else if (addressing_bits == kOpenCLAddressModeClamp) {
983       // CLK_ADDRESS_CLAMP
984       addressing_mode = AddressMode::kClampToBorder;
985     } else if (addressing_bits == kOpenCLAddressModeRepeat) {
986       // CLK_ADDRESS_REPEAT
987       addressing_mode = AddressMode::kRepeat;
988     } else if (addressing_bits == kOpenCLAddressModeMirroredRepeat) {
989       // CLK_ADDRESS_MIRRORED_REPEAT
990       addressing_mode = AddressMode::kMirroredRepeat;
991     }
992     literal_sampler->SetAddressModeU(addressing_mode);
993     literal_sampler->SetAddressModeV(addressing_mode);
994     // TODO(alan-baker): If this is used with an arrayed image then W should use
995     // kClampToEdge always, but this information is not currently available.
996     literal_sampler->SetAddressModeW(addressing_mode);
997 
998     // Next bit is filtering mode.
999     FilterType filtering_mode = FilterType::kUnknown;
1000     if (info.mask & kOpenCLFilterModeNearestBit) {
1001       filtering_mode = FilterType::kNearest;
1002     } else if (info.mask & kOpenCLFilterModeLinearBit) {
1003       filtering_mode = FilterType::kLinear;
1004     }
1005     literal_sampler->SetMagFilter(filtering_mode);
1006     literal_sampler->SetMinFilter(filtering_mode);
1007 
1008     // TODO(alan-baker): OpenCL wants the border color to be based on image
1009     // channel orders which aren't accessible.
1010 
1011     // clspv never generates multiple MIPMAP levels.
1012     literal_sampler->SetMinLOD(0.0f);
1013     literal_sampler->SetMaxLOD(0.0f);
1014 
1015     opencl_literal_samplers_.push_back(std::move(literal_sampler));
1016     info.sampler = opencl_literal_samplers_.back().get();
1017   }
1018 
1019   return {};
1020 }
1021 
GenerateOpenCLPushConstants()1022 Result Pipeline::GenerateOpenCLPushConstants() {
1023   if (!IsCompute() || GetShaders().empty() ||
1024       GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
1025     return {};
1026   }
1027 
1028   const auto& shader_info = GetShaders()[0];
1029   if (shader_info.GetPushConstants().empty())
1030     return {};
1031 
1032   Result r = CreatePushConstantBuffer();
1033   if (!r.IsSuccess())
1034     return r;
1035 
1036   auto* buf = GetPushConstantBuffer().buffer;
1037   assert(buf);
1038 
1039   // Determine size and contents of the push constant buffer.
1040   for (const auto& pc : shader_info.GetPushConstants()) {
1041     assert(pc.size % sizeof(uint32_t) == 0);
1042     assert(pc.offset % sizeof(uint32_t) == 0);
1043 
1044     if (buf->GetSizeInBytes() < pc.offset + pc.size)
1045       buf->SetSizeInBytes(pc.offset + pc.size);
1046 
1047     std::vector<uint32_t> bytes(pc.size / sizeof(uint32_t));
1048     uint32_t base = 0;
1049     switch (pc.type) {
1050       case Pipeline::ShaderInfo::PushConstant::PushConstantType::kDimensions:
1051         // All compute kernel launches are 3D.
1052         bytes[base] = 3;
1053         break;
1054       case Pipeline::ShaderInfo::PushConstant::PushConstantType::kGlobalOffset:
1055         // Global offsets are not currently supported.
1056         bytes[base] = 0;
1057         bytes[base + 1] = 0;
1058         bytes[base + 2] = 0;
1059         break;
1060       case Pipeline::ShaderInfo::PushConstant::PushConstantType::kRegionOffset:
1061         // Region offsets are not currently supported.
1062         bytes[base] = 0;
1063         bytes[base + 1] = 0;
1064         bytes[base + 2] = 0;
1065         break;
1066     }
1067     memcpy(buf->ValuePtr()->data() + pc.offset, bytes.data(),
1068            bytes.size() * sizeof(uint32_t));
1069   }
1070 
1071   return {};
1072 }
1073 
1074 }  // namespace amber
1075