1 // Copyright 2018 The Amber Authors.
2 // Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include "src/pipeline.h"
17
18 #include <algorithm>
19 #include <cstring>
20 #include <limits>
21 #include <set>
22
23 #include "src/make_unique.h"
24 #include "src/type_parser.h"
25
26 namespace amber {
27 namespace {
28
29 const char* kDefaultColorBufferFormat = "B8G8R8A8_UNORM";
30 const char* kDefaultDepthBufferFormat = "D32_SFLOAT_S8_UINT";
31
32 // OpenCL coordinates mode is bit 0
33 const uint32_t kOpenCLNormalizedCoordsBit = 1;
34 // OpenCL address mode bits are bits 1,2,3.
35 const uint32_t kOpenCLAddressModeBits = 0xe;
36 // OpenCL address mode bit values.
37 const uint32_t kOpenCLAddressModeNone = 0;
38 const uint32_t kOpenCLAddressModeClampToEdge = 2;
39 const uint32_t kOpenCLAddressModeClamp = 4;
40 const uint32_t kOpenCLAddressModeRepeat = 6;
41 const uint32_t kOpenCLAddressModeMirroredRepeat = 8;
42 // OpenCL filter mode bits.
43 const uint32_t kOpenCLFilterModeNearestBit = 0x10;
44 const uint32_t kOpenCLFilterModeLinearBit = 0x20;
45
46 } // namespace
47
48 const char* Pipeline::kGeneratedColorBuffer = "framebuffer";
49 const char* Pipeline::kGeneratedDepthBuffer = "depth_buffer";
50 const char* Pipeline::kGeneratedPushConstantBuffer = "push_constant_buffer";
51
ShaderInfo(Shader * shader,ShaderType type)52 Pipeline::ShaderInfo::ShaderInfo(Shader* shader, ShaderType type)
53 : shader_(shader),
54 shader_type_(type),
55 entry_point_("main"),
56 required_subgroup_size_setting_(RequiredSubgroupSizeSetting::kNotSet),
57 required_subgroup_size_(0),
58 varying_subgroup_size_(false),
59 require_full_subgroups_(false) {}
60
61 Pipeline::ShaderInfo::ShaderInfo(const ShaderInfo&) = default;
62
63 Pipeline::ShaderInfo::~ShaderInfo() = default;
64
Pipeline(PipelineType type)65 Pipeline::Pipeline(PipelineType type) : pipeline_type_(type) {
66 }
67
68 Pipeline::~Pipeline() = default;
69
Clone() const70 std::unique_ptr<Pipeline> Pipeline::Clone() const {
71 auto clone = MakeUnique<Pipeline>(pipeline_type_);
72 clone->shaders_ = shaders_;
73 clone->color_attachments_ = color_attachments_;
74 clone->vertex_buffers_ = vertex_buffers_;
75 clone->buffers_ = buffers_;
76 clone->depth_stencil_buffer_ = depth_stencil_buffer_;
77 clone->index_buffer_ = index_buffer_;
78 clone->fb_width_ = fb_width_;
79 clone->fb_height_ = fb_height_;
80 clone->set_arg_values_ = set_arg_values_;
81 clone->pipeline_data_ = pipeline_data_;
82
83 if (!opencl_pod_buffers_.empty()) {
84 // Generate specific buffers for the clone.
85 clone->GenerateOpenCLPodBuffers();
86 }
87
88 return clone;
89 }
90
AddShader(Shader * shader,ShaderType shader_type)91 Result Pipeline::AddShader(Shader* shader, ShaderType shader_type) {
92 if (!shader)
93 return Result("shader can not be null when attached to pipeline");
94
95 if (pipeline_type_ == PipelineType::kCompute &&
96 shader_type != kShaderTypeCompute) {
97 return Result("only compute shaders allowed in a compute pipeline");
98 }
99 if (pipeline_type_ == PipelineType::kGraphics &&
100 shader_type == kShaderTypeCompute) {
101 return Result("can not add a compute shader to a graphics pipeline");
102 }
103
104 if (pipeline_type_ != PipelineType::kRayTracing) {
105 for (auto& info : shaders_) {
106 const auto* is = info.GetShader();
107 if (is == shader)
108 return Result("can not add duplicate shader to pipeline");
109 if (is->GetType() == shader_type) {
110 info.SetShader(shader);
111 return {};
112 }
113 }
114 }
115
116 shaders_.emplace_back(shader, shader_type);
117 return {};
118 }
119
SetShaderOptimizations(const Shader * shader,const std::vector<std::string> & opts)120 Result Pipeline::SetShaderOptimizations(const Shader* shader,
121 const std::vector<std::string>& opts) {
122 if (!shader)
123 return Result("invalid shader specified for optimizations");
124
125 std::set<std::string> seen;
126 for (const auto& opt : opts) {
127 if (seen.count(opt) != 0)
128 return Result("duplicate optimization flag (" + opt + ") set on shader");
129
130 seen.insert(opt);
131 }
132
133 for (auto& info : shaders_) {
134 const auto* is = info.GetShader();
135 if (is == shader) {
136 info.SetShaderOptimizations(opts);
137 return {};
138 }
139 }
140
141 return Result("unknown shader specified for optimizations: " +
142 shader->GetName());
143 }
144
SetShaderCompileOptions(const Shader * shader,const std::vector<std::string> & opts)145 Result Pipeline::SetShaderCompileOptions(const Shader* shader,
146 const std::vector<std::string>& opts) {
147 if (!shader)
148 return Result("invalid shader specified for compile options");
149
150 for (auto& info : shaders_) {
151 const auto* is = info.GetShader();
152 if (is == shader) {
153 info.SetCompileOptions(opts);
154 return {};
155 }
156 }
157
158 return Result("unknown shader specified for compile options: " +
159 shader->GetName());
160 }
161
SetShaderRequiredSubgroupSize(const Shader * shader,const ShaderInfo::RequiredSubgroupSizeSetting setting,const uint32_t size)162 Result Pipeline::SetShaderRequiredSubgroupSize(
163 const Shader* shader,
164 const ShaderInfo::RequiredSubgroupSizeSetting setting,
165 const uint32_t size) {
166 if (!shader)
167 return Result("invalid shader specified for required subgroup size");
168
169 for (auto& info : shaders_) {
170 const auto* is = info.GetShader();
171 if (is == shader) {
172 info.SetRequiredSubgroupSizeSetting(setting, size);
173 return {};
174 }
175 }
176
177 return Result("unknown shader specified for required subgroup size: " +
178 shader->GetName());
179 }
180
SetShaderRequiredSubgroupSize(const Shader * shader,const uint32_t subgroupSize)181 Result Pipeline::SetShaderRequiredSubgroupSize(const Shader* shader,
182 const uint32_t subgroupSize) {
183 const bool isPow2 =
184 subgroupSize > 0 && (subgroupSize & (subgroupSize - 1)) == 0;
185 if (subgroupSize == 0 || subgroupSize > 128 || !isPow2) {
186 return Result("invalid required subgroup size " +
187 std::to_string(subgroupSize) + " specified for shader name " +
188 shader->GetName());
189 }
190 const ShaderInfo::RequiredSubgroupSizeSetting setting =
191 ShaderInfo::RequiredSubgroupSizeSetting::kSetToSpecificSize;
192 return SetShaderRequiredSubgroupSize(shader, setting, subgroupSize);
193 }
194
SetShaderRequiredSubgroupSizeToMinimum(const Shader * shader)195 Result Pipeline::SetShaderRequiredSubgroupSizeToMinimum(const Shader* shader) {
196 const ShaderInfo::RequiredSubgroupSizeSetting subgroupSizeSetting =
197 ShaderInfo::RequiredSubgroupSizeSetting::kSetToMinimumSize;
198 return SetShaderRequiredSubgroupSize(shader, subgroupSizeSetting, 0);
199 }
200
SetShaderRequiredSubgroupSizeToMaximum(const Shader * shader)201 Result Pipeline::SetShaderRequiredSubgroupSizeToMaximum(const Shader* shader) {
202 const ShaderInfo::RequiredSubgroupSizeSetting subgroupSizeSetting =
203 ShaderInfo::RequiredSubgroupSizeSetting::kSetToMaximumSize;
204 return SetShaderRequiredSubgroupSize(shader, subgroupSizeSetting, 0);
205 }
206
SetShaderVaryingSubgroupSize(const Shader * shader,const bool isSet)207 Result Pipeline::SetShaderVaryingSubgroupSize(const Shader* shader,
208 const bool isSet) {
209 if (!shader)
210 return Result("invalid shader specified for varying subgroup size");
211
212 for (auto& info : shaders_) {
213 const auto* is = info.GetShader();
214 if (is == shader) {
215 info.SetVaryingSubgroupSize(isSet);
216 return {};
217 }
218 }
219
220 return Result("unknown shader specified for varying subgroup size: " +
221 shader->GetName());
222 }
223
SetShaderRequireFullSubgroups(const Shader * shader,const bool isSet)224 Result Pipeline::SetShaderRequireFullSubgroups(const Shader* shader,
225 const bool isSet) {
226 if (!shader)
227 return Result("invalid shader specified for optimizations");
228
229 for (auto& info : shaders_) {
230 const auto* is = info.GetShader();
231 if (is == shader) {
232 info.SetRequireFullSubgroups(isSet);
233 return {};
234 }
235 }
236
237 return Result("unknown shader specified for optimizations: " +
238 shader->GetName());
239 }
240
SetShaderEntryPoint(const Shader * shader,const std::string & name)241 Result Pipeline::SetShaderEntryPoint(const Shader* shader,
242 const std::string& name) {
243 if (!shader)
244 return Result("invalid shader specified for entry point");
245 if (name.empty())
246 return Result("entry point should not be blank");
247
248 for (auto& info : shaders_) {
249 if (info.GetShader() == shader) {
250 if (info.GetEntryPoint() != "main")
251 return Result("multiple entry points given for the same shader");
252
253 info.SetEntryPoint(name);
254 return {};
255 }
256 }
257
258 return Result("unknown shader specified for entry point: " +
259 shader->GetName());
260 }
261
SetShaderType(const Shader * shader,ShaderType type)262 Result Pipeline::SetShaderType(const Shader* shader, ShaderType type) {
263 if (!shader)
264 return Result("invalid shader specified for shader type");
265
266 for (auto& info : shaders_) {
267 if (info.GetShader() == shader) {
268 info.SetShaderType(type);
269 return {};
270 }
271 }
272
273 return Result("unknown shader specified for shader type: " +
274 shader->GetName());
275 }
276
Validate() const277 Result Pipeline::Validate() const {
278 for (const auto& attachment : color_attachments_) {
279 if (attachment.buffer->ElementCount() !=
280 (fb_width_ << attachment.base_mip_level) *
281 (fb_height_ << attachment.base_mip_level)) {
282 return Result(
283 "shared framebuffer must have same size over all PIPELINES");
284 }
285 }
286
287 if (depth_stencil_buffer_.buffer &&
288 depth_stencil_buffer_.buffer->ElementCount() != fb_width_ * fb_height_) {
289 return Result("shared depth buffer must have same size over all PIPELINES");
290 }
291
292 for (auto& buf : GetBuffers()) {
293 if (buf.buffer->GetFormat() == nullptr) {
294 return Result("buffer (" + std::to_string(buf.descriptor_set) + ":" +
295 std::to_string(buf.binding) + ") requires a format");
296 }
297 }
298
299 if (pipeline_type_ == PipelineType::kRayTracing)
300 return ValidateRayTracing();
301 else if (pipeline_type_ == PipelineType::kGraphics)
302 return ValidateGraphics();
303
304 return ValidateCompute();
305 }
306
ValidateRayTracing() const307 Result Pipeline::ValidateRayTracing() const {
308 if (shader_groups_.empty() && shaders_.empty() && tlases_.empty())
309 return Result("Shader groups are missing");
310
311 return {};
312 }
313
ValidateGraphics() const314 Result Pipeline::ValidateGraphics() const {
315 if (color_attachments_.empty())
316 return Result("PIPELINE missing color attachment");
317
318 bool found_vertex = false;
319 for (const auto& info : shaders_) {
320 const auto* s = info.GetShader();
321 if (s->GetType() == kShaderTypeVertex) {
322 found_vertex = true;
323 break;
324 }
325 }
326
327 if (!found_vertex)
328 return Result("graphics pipeline requires a vertex shader");
329
330 for (const auto& att : color_attachments_) {
331 auto width = att.buffer->GetWidth();
332 auto height = att.buffer->GetHeight();
333 for (uint32_t level = 1; level < att.buffer->GetMipLevels(); level++) {
334 width >>= 1;
335 if (width == 0)
336 return Result("color attachment with " +
337 std::to_string(att.buffer->GetMipLevels()) +
338 " mip levels would have zero width for level " +
339 std::to_string(level));
340 height >>= 1;
341 if (height == 0)
342 return Result("color attachment with " +
343 std::to_string(att.buffer->GetMipLevels()) +
344 " mip levels would have zero height for level " +
345 std::to_string(level));
346 }
347 }
348
349 return {};
350 }
351
ValidateCompute() const352 Result Pipeline::ValidateCompute() const {
353 if (shaders_.empty())
354 return Result("compute pipeline requires a compute shader");
355
356 return {};
357 }
358
UpdateFramebufferSizes()359 void Pipeline::UpdateFramebufferSizes() {
360 uint32_t size = fb_width_ * fb_height_;
361 if (size == 0)
362 return;
363
364 for (auto& attachment : color_attachments_) {
365 auto mip0_width = fb_width_ << attachment.base_mip_level;
366 auto mip0_height = fb_height_ << attachment.base_mip_level;
367 attachment.buffer->SetWidth(mip0_width);
368 attachment.buffer->SetHeight(mip0_height);
369 attachment.buffer->SetElementCount(mip0_width * mip0_height);
370 }
371
372 if (depth_stencil_buffer_.buffer) {
373 depth_stencil_buffer_.buffer->SetWidth(fb_width_);
374 depth_stencil_buffer_.buffer->SetHeight(fb_height_);
375 depth_stencil_buffer_.buffer->SetElementCount(size);
376 }
377 }
378
AddColorAttachment(Buffer * buf,uint32_t location,uint32_t base_mip_level)379 Result Pipeline::AddColorAttachment(Buffer* buf,
380 uint32_t location,
381 uint32_t base_mip_level) {
382 for (const auto& attachment : color_attachments_) {
383 if (attachment.location == location)
384 return Result("can not bind two color buffers to the same LOCATION");
385 if (attachment.buffer == buf)
386 return Result("color buffer may only be bound to a PIPELINE once");
387 }
388
389 color_attachments_.push_back(BufferInfo{buf});
390
391 auto& info = color_attachments_.back();
392 info.location = location;
393 info.type = BufferType::kColor;
394 info.base_mip_level = base_mip_level;
395 auto mip0_width = fb_width_ << base_mip_level;
396 auto mip0_height = fb_height_ << base_mip_level;
397 buf->SetWidth(mip0_width);
398 buf->SetHeight(mip0_height);
399 buf->SetElementCount(mip0_width * mip0_height);
400
401 return {};
402 }
403
AddResolveTarget(Buffer * buf)404 Result Pipeline::AddResolveTarget(Buffer* buf) {
405 resolve_targets_.push_back(BufferInfo{buf});
406
407 auto& info = resolve_targets_.back();
408 info.type = BufferType::kResolve;
409 buf->SetWidth(fb_width_);
410 buf->SetHeight(fb_height_);
411 buf->SetElementCount(fb_width_ * fb_height_);
412
413 return {};
414 }
415
GetLocationForColorAttachment(Buffer * buf,uint32_t * loc) const416 Result Pipeline::GetLocationForColorAttachment(Buffer* buf,
417 uint32_t* loc) const {
418 for (const auto& info : color_attachments_) {
419 if (info.buffer == buf) {
420 *loc = info.location;
421 return {};
422 }
423 }
424 return Result("Unable to find requested buffer");
425 }
426
SetDepthStencilBuffer(Buffer * buf)427 Result Pipeline::SetDepthStencilBuffer(Buffer* buf) {
428 if (depth_stencil_buffer_.buffer != nullptr)
429 return Result("can only bind one depth/stencil buffer in a PIPELINE");
430
431 depth_stencil_buffer_.buffer = buf;
432 depth_stencil_buffer_.type = BufferType::kDepthStencil;
433
434 buf->SetWidth(fb_width_);
435 buf->SetHeight(fb_height_);
436 buf->SetElementCount(fb_width_ * fb_height_);
437 return {};
438 }
439
SetIndexBuffer(Buffer * buf)440 Result Pipeline::SetIndexBuffer(Buffer* buf) {
441 if (index_buffer_ != nullptr)
442 return Result("can only bind one INDEX_DATA buffer in a pipeline");
443
444 index_buffer_ = buf;
445 return {};
446 }
447
AddVertexBuffer(Buffer * buf,uint32_t location,InputRate rate,Format * format,uint32_t offset,uint32_t stride)448 Result Pipeline::AddVertexBuffer(Buffer* buf,
449 uint32_t location,
450 InputRate rate,
451 Format* format,
452 uint32_t offset,
453 uint32_t stride) {
454 for (const auto& vtex : vertex_buffers_) {
455 if (vtex.location == location)
456 return Result("can not bind two vertex buffers to the same LOCATION");
457 }
458
459 vertex_buffers_.push_back(BufferInfo{buf});
460 vertex_buffers_.back().location = location;
461 vertex_buffers_.back().type = BufferType::kVertex;
462 vertex_buffers_.back().input_rate = rate;
463 vertex_buffers_.back().format = format;
464 vertex_buffers_.back().offset = offset;
465 vertex_buffers_.back().stride = stride;
466 return {};
467 }
468
SetPushConstantBuffer(Buffer * buf)469 Result Pipeline::SetPushConstantBuffer(Buffer* buf) {
470 if (push_constant_buffer_.buffer != nullptr)
471 return Result("can only bind one push constant buffer in a PIPELINE");
472
473 push_constant_buffer_.buffer = buf;
474 push_constant_buffer_.type = BufferType::kPushConstant;
475 return {};
476 }
477
CreatePushConstantBuffer()478 Result Pipeline::CreatePushConstantBuffer() {
479 if (push_constant_buffer_.buffer != nullptr)
480 return Result("can only bind one push constant buffer in a PIPELINE");
481
482 TypeParser parser;
483 auto type = parser.Parse("R8_UINT");
484 auto fmt = MakeUnique<Format>(type.get());
485
486 std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
487 buf->SetName(kGeneratedPushConstantBuffer);
488 buf->SetFormat(fmt.get());
489
490 push_constant_buffer_.buffer = buf.get();
491 push_constant_buffer_.type = BufferType::kPushConstant;
492
493 formats_.push_back(std::move(fmt));
494 types_.push_back(std::move(type));
495 opencl_push_constants_ = std::move(buf);
496
497 return {};
498 }
499
GenerateDefaultColorAttachmentBuffer()500 std::unique_ptr<Buffer> Pipeline::GenerateDefaultColorAttachmentBuffer() {
501 TypeParser parser;
502 auto type = parser.Parse(kDefaultColorBufferFormat);
503 auto fmt = MakeUnique<Format>(type.get());
504
505 std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
506 buf->SetName(kGeneratedColorBuffer);
507 buf->SetFormat(fmt.get());
508
509 formats_.push_back(std::move(fmt));
510 types_.push_back(std::move(type));
511 return buf;
512 }
513
514 std::unique_ptr<Buffer>
GenerateDefaultDepthStencilAttachmentBuffer()515 Pipeline::GenerateDefaultDepthStencilAttachmentBuffer() {
516 TypeParser parser;
517 auto type = parser.Parse(kDefaultDepthBufferFormat);
518 auto fmt = MakeUnique<Format>(type.get());
519
520 std::unique_ptr<Buffer> buf = MakeUnique<Buffer>();
521 buf->SetName(kGeneratedDepthBuffer);
522 buf->SetFormat(fmt.get());
523
524 formats_.push_back(std::move(fmt));
525 types_.push_back(std::move(type));
526 return buf;
527 }
528
GetBufferForBinding(uint32_t descriptor_set,uint32_t binding) const529 Buffer* Pipeline::GetBufferForBinding(uint32_t descriptor_set,
530 uint32_t binding) const {
531 for (const auto& info : buffers_) {
532 if (info.descriptor_set == descriptor_set && info.binding == binding)
533 return info.buffer;
534 }
535 return nullptr;
536 }
537
AddBuffer(Buffer * buf,BufferType type,uint32_t descriptor_set,uint32_t binding,uint32_t base_mip_level,uint32_t dynamic_offset,uint64_t descriptor_offset,uint64_t descriptor_range)538 void Pipeline::AddBuffer(Buffer* buf,
539 BufferType type,
540 uint32_t descriptor_set,
541 uint32_t binding,
542 uint32_t base_mip_level,
543 uint32_t dynamic_offset,
544 uint64_t descriptor_offset,
545 uint64_t descriptor_range) {
546 buffers_.push_back(BufferInfo{buf});
547
548 auto& info = buffers_.back();
549 info.descriptor_set = descriptor_set;
550 info.binding = binding;
551 info.type = type;
552 info.base_mip_level = base_mip_level;
553 info.dynamic_offset = dynamic_offset;
554 info.sampler = buf->GetSampler();
555 info.descriptor_offset = descriptor_offset;
556 info.descriptor_range = descriptor_range;
557 }
558
AddBuffer(Buffer * buf,BufferType type,const std::string & arg_name)559 void Pipeline::AddBuffer(Buffer* buf,
560 BufferType type,
561 const std::string& arg_name) {
562 // If this buffer binding already exists, overwrite with the new buffer.
563 for (auto& info : buffers_) {
564 if (info.arg_name == arg_name) {
565 info.buffer = buf;
566 return;
567 }
568 }
569
570 buffers_.push_back(BufferInfo{buf});
571
572 auto& info = buffers_.back();
573 info.type = type;
574 info.arg_name = arg_name;
575 info.descriptor_set = std::numeric_limits<uint32_t>::max();
576 info.binding = std::numeric_limits<uint32_t>::max();
577 info.arg_no = std::numeric_limits<uint32_t>::max();
578 info.base_mip_level = 0;
579 info.dynamic_offset = 0;
580 }
581
AddBuffer(Buffer * buf,BufferType type,uint32_t arg_no)582 void Pipeline::AddBuffer(Buffer* buf, BufferType type, uint32_t arg_no) {
583 // If this buffer binding already exists, overwrite with the new buffer.
584 for (auto& info : buffers_) {
585 if (info.arg_no == arg_no) {
586 info.buffer = buf;
587 return;
588 }
589 }
590
591 buffers_.push_back(BufferInfo{buf});
592
593 auto& info = buffers_.back();
594 info.type = type;
595 info.arg_no = arg_no;
596 info.descriptor_set = std::numeric_limits<uint32_t>::max();
597 info.binding = std::numeric_limits<uint32_t>::max();
598 info.base_mip_level = 0;
599 info.dynamic_offset = 0;
600 }
601
ClearBuffers(uint32_t descriptor_set,uint32_t binding)602 void Pipeline::ClearBuffers(uint32_t descriptor_set, uint32_t binding) {
603 buffers_.erase(
604 std::remove_if(buffers_.begin(), buffers_.end(),
605 [descriptor_set, binding](BufferInfo& info) -> bool {
606 return (info.descriptor_set == descriptor_set &&
607 info.binding == binding);
608 }),
609 buffers_.end());
610 }
611
AddSampler(Sampler * sampler,uint32_t descriptor_set,uint32_t binding)612 void Pipeline::AddSampler(Sampler* sampler,
613 uint32_t descriptor_set,
614 uint32_t binding) {
615 samplers_.push_back(SamplerInfo{sampler});
616
617 auto& info = samplers_.back();
618 info.descriptor_set = descriptor_set;
619 info.binding = binding;
620 info.mask = std::numeric_limits<uint32_t>::max();
621 }
622
AddSampler(Sampler * sampler,const std::string & arg_name)623 void Pipeline::AddSampler(Sampler* sampler, const std::string& arg_name) {
624 for (auto& info : samplers_) {
625 if (info.arg_name == arg_name) {
626 info.sampler = sampler;
627 return;
628 }
629 }
630
631 samplers_.push_back(SamplerInfo{sampler});
632
633 auto& info = samplers_.back();
634 info.arg_name = arg_name;
635 info.descriptor_set = std::numeric_limits<uint32_t>::max();
636 info.binding = std::numeric_limits<uint32_t>::max();
637 info.arg_no = std::numeric_limits<uint32_t>::max();
638 info.mask = std::numeric_limits<uint32_t>::max();
639 }
640
AddSampler(Sampler * sampler,uint32_t arg_no)641 void Pipeline::AddSampler(Sampler* sampler, uint32_t arg_no) {
642 for (auto& info : samplers_) {
643 if (info.arg_no == arg_no) {
644 info.sampler = sampler;
645 return;
646 }
647 }
648
649 samplers_.push_back(SamplerInfo{sampler});
650
651 auto& info = samplers_.back();
652 info.arg_no = arg_no;
653 info.descriptor_set = std::numeric_limits<uint32_t>::max();
654 info.binding = std::numeric_limits<uint32_t>::max();
655 info.mask = std::numeric_limits<uint32_t>::max();
656 }
657
AddSampler(uint32_t mask,uint32_t descriptor_set,uint32_t binding)658 void Pipeline::AddSampler(uint32_t mask,
659 uint32_t descriptor_set,
660 uint32_t binding) {
661 samplers_.push_back(SamplerInfo{nullptr});
662
663 auto& info = samplers_.back();
664 info.arg_name = "";
665 info.arg_no = std::numeric_limits<uint32_t>::max();
666 info.mask = mask;
667 info.descriptor_set = descriptor_set;
668 info.binding = binding;
669 }
670
AddTLAS(TLAS * tlas,uint32_t descriptor_set,uint32_t binding)671 void Pipeline::AddTLAS(TLAS* tlas, uint32_t descriptor_set, uint32_t binding) {
672 tlases_.push_back(TLASInfo(tlas));
673
674 auto& info = tlases_.back();
675
676 info.descriptor_set = descriptor_set;
677 info.binding = binding;
678 }
679
ClearSamplers(uint32_t descriptor_set,uint32_t binding)680 void Pipeline::ClearSamplers(uint32_t descriptor_set, uint32_t binding) {
681 samplers_.erase(
682 std::remove_if(samplers_.begin(), samplers_.end(),
683 [descriptor_set, binding](SamplerInfo& info) -> bool {
684 return (info.descriptor_set == descriptor_set &&
685 info.binding == binding);
686 }),
687 samplers_.end());
688 }
689
UpdateOpenCLBufferBindings()690 Result Pipeline::UpdateOpenCLBufferBindings() {
691 if (!IsCompute() || GetShaders().empty() ||
692 GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
693 return {};
694 }
695
696 const auto& shader_info = GetShaders()[0];
697 const auto& descriptor_map = shader_info.GetDescriptorMap();
698 if (descriptor_map.empty())
699 return {};
700
701 const auto iter = descriptor_map.find(shader_info.GetEntryPoint());
702 if (iter == descriptor_map.end())
703 return {};
704
705 for (auto& info : samplers_) {
706 if (info.descriptor_set == std::numeric_limits<uint32_t>::max() &&
707 info.binding == std::numeric_limits<uint32_t>::max()) {
708 for (const auto& entry : iter->second) {
709 if (entry.arg_name == info.arg_name ||
710 entry.arg_ordinal == info.arg_no) {
711 if (entry.kind !=
712 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SAMPLER) {
713 return Result("Sampler bound to non-sampler kernel arg");
714 }
715 info.descriptor_set = entry.descriptor_set;
716 info.binding = entry.binding;
717 }
718 }
719 }
720 }
721
722 for (auto& info : buffers_) {
723 if (info.descriptor_set == std::numeric_limits<uint32_t>::max() &&
724 info.binding == std::numeric_limits<uint32_t>::max()) {
725 for (const auto& entry : iter->second) {
726 if (entry.arg_name == info.arg_name ||
727 entry.arg_ordinal == info.arg_no) {
728 // Buffer storage class consistency checks.
729 if (info.type == BufferType::kUnknown) {
730 // Set the appropriate buffer type.
731 switch (entry.kind) {
732 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::UBO:
733 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO:
734 info.type = BufferType::kUniform;
735 break;
736 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SSBO:
737 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD:
738 info.type = BufferType::kStorage;
739 break;
740 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::RO_IMAGE:
741 info.type = BufferType::kSampledImage;
742 break;
743 case Pipeline::ShaderInfo::DescriptorMapEntry::Kind::WO_IMAGE:
744 info.type = BufferType::kStorageImage;
745 break;
746 default:
747 return Result("Unhandled buffer type for OPENCL-C shader");
748 }
749 } else if (info.type == BufferType::kUniform) {
750 if (entry.kind !=
751 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::UBO &&
752 entry.kind !=
753 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO) {
754 return Result("Buffer " + info.buffer->GetName() +
755 " must be a uniform binding");
756 }
757 } else if (info.type == BufferType::kStorage) {
758 if (entry.kind !=
759 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::SSBO &&
760 entry.kind !=
761 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD) {
762 return Result("Buffer " + info.buffer->GetName() +
763 " must be a storage binding");
764 }
765 } else if (info.type == BufferType::kSampledImage) {
766 if (entry.kind !=
767 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::RO_IMAGE) {
768 return Result("Buffer " + info.buffer->GetName() +
769 " must be a read-only image binding");
770 }
771 } else if (info.type == BufferType::kStorageImage) {
772 if (entry.kind !=
773 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::WO_IMAGE) {
774 return Result("Buffer " + info.buffer->GetName() +
775 " must be a write-only image binding");
776 }
777 } else {
778 return Result("Unhandled buffer type for OPENCL-C shader");
779 }
780 info.descriptor_set = entry.descriptor_set;
781 info.binding = entry.binding;
782 }
783 }
784 }
785 }
786
787 return {};
788 }
789
GenerateOpenCLPodBuffers()790 Result Pipeline::GenerateOpenCLPodBuffers() {
791 if (!IsCompute() || GetShaders().empty() ||
792 GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
793 return {};
794 }
795
796 const auto& shader_info = GetShaders()[0];
797 const auto& descriptor_map = shader_info.GetDescriptorMap();
798 if (descriptor_map.empty())
799 return {};
800
801 const auto iter = descriptor_map.find(shader_info.GetEntryPoint());
802 if (iter == descriptor_map.end())
803 return {};
804
805 // For each SET command, do the following:
806 // 1. Find the descriptor map entry for that argument.
807 // 2. Find or create the buffer for the descriptor set and binding pair.
808 // 3. Write the data for the SET command at the right offset.
809 for (const auto& arg_info : SetArgValues()) {
810 uint32_t descriptor_set = std::numeric_limits<uint32_t>::max();
811 uint32_t binding = std::numeric_limits<uint32_t>::max();
812 uint32_t offset = 0;
813 uint32_t arg_size = 0;
814 bool uses_name = !arg_info.name.empty();
815 Pipeline::ShaderInfo::DescriptorMapEntry::Kind kind =
816 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD;
817 for (const auto& entry : iter->second) {
818 if (entry.kind != Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD &&
819 entry.kind !=
820 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_UBO &&
821 entry.kind != Pipeline::ShaderInfo::DescriptorMapEntry::Kind::
822 POD_PUSHCONSTANT) {
823 continue;
824 }
825
826 // Found the right entry.
827 if ((uses_name && entry.arg_name == arg_info.name) ||
828 entry.arg_ordinal == arg_info.ordinal) {
829 descriptor_set = entry.descriptor_set;
830 binding = entry.binding;
831 offset = entry.pod_offset;
832 arg_size = entry.pod_arg_size;
833 kind = entry.kind;
834 break;
835 }
836 }
837
838 Buffer* buffer = nullptr;
839 if (kind ==
840 Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD_PUSHCONSTANT) {
841 if (GetPushConstantBuffer().buffer == nullptr) {
842 auto r = CreatePushConstantBuffer();
843 if (!r.IsSuccess())
844 return r;
845 }
846 buffer = GetPushConstantBuffer().buffer;
847 } else {
848 if (descriptor_set == std::numeric_limits<uint32_t>::max() ||
849 binding == std::numeric_limits<uint32_t>::max()) {
850 std::string message =
851 "could not find descriptor map entry for SET command: kernel " +
852 shader_info.GetEntryPoint();
853 if (uses_name) {
854 message += ", name " + arg_info.name;
855 } else {
856 message += ", number " + std::to_string(arg_info.ordinal);
857 }
858 return Result(message);
859 }
860
861 auto buf_iter = opencl_pod_buffer_map_.lower_bound(
862 std::make_pair(descriptor_set, binding));
863 if (buf_iter == opencl_pod_buffer_map_.end() ||
864 buf_iter->first.first != descriptor_set ||
865 buf_iter->first.second != binding) {
866 // Ensure no buffer was previously bound for this descriptor set and
867 // binding pair.
868 for (const auto& buf_info : GetBuffers()) {
869 if (buf_info.descriptor_set == descriptor_set &&
870 buf_info.binding == binding) {
871 return Result("previously bound buffer " +
872 buf_info.buffer->GetName() +
873 " to PoD args at descriptor set " +
874 std::to_string(descriptor_set) + " binding " +
875 std::to_string(binding));
876 }
877 }
878
879 // Add a new buffer for this descriptor set and binding.
880 opencl_pod_buffers_.push_back(MakeUnique<Buffer>());
881 buffer = opencl_pod_buffers_.back().get();
882 auto buffer_type =
883 kind == Pipeline::ShaderInfo::DescriptorMapEntry::Kind::POD
884 ? BufferType::kStorage
885 : BufferType::kUniform;
886
887 // Use an 8-bit type because all the data in the descriptor map is
888 // byte-based and it simplifies the logic for sizing below.
889 TypeParser parser;
890 auto type = parser.Parse("R8_UINT");
891 auto fmt = MakeUnique<Format>(type.get());
892 buffer->SetFormat(fmt.get());
893 formats_.push_back(std::move(fmt));
894 types_.push_back(std::move(type));
895
896 buffer->SetName(GetName() + "_pod_buffer_" +
897 std::to_string(descriptor_set) + "_" +
898 std::to_string(binding));
899 opencl_pod_buffer_map_.insert(
900 buf_iter,
901 std::make_pair(std::make_pair(descriptor_set, binding), buffer));
902 AddBuffer(buffer, buffer_type, descriptor_set, binding, 0, 0, 0, ~0ULL);
903 } else {
904 buffer = buf_iter->second;
905 }
906
907 // Resize if necessary.
908 if (buffer->ValueCount() < offset + arg_size) {
909 buffer->SetSizeInElements(offset + arg_size);
910 }
911
912 // Check the data size.
913 if (arg_size != arg_info.fmt->SizeInBytes()) {
914 std::string message = "SET command uses incorrect data size: kernel " +
915 shader_info.GetEntryPoint();
916 if (uses_name) {
917 message += ", name " + arg_info.name;
918 } else {
919 message += ", number " + std::to_string(arg_info.ordinal);
920 }
921 return Result(message);
922 }
923 }
924
925 // Convert the argument value into bytes. Currently, only scalar arguments
926 // are supported.
927 const auto arg_byte_size = arg_info.fmt->SizeInBytes();
928 std::vector<Value> data_bytes;
929 for (uint32_t i = 0; i < arg_byte_size; ++i) {
930 Value v;
931 if (arg_info.value.IsFloat()) {
932 if (arg_byte_size == sizeof(double)) {
933 union {
934 uint64_t u;
935 double d;
936 } u;
937 u.d = arg_info.value.AsDouble();
938 v.SetIntValue((u.u >> (i * 8)) & 0xff);
939 } else {
940 union {
941 uint32_t u;
942 float f;
943 } u;
944 u.f = arg_info.value.AsFloat();
945 v.SetIntValue((u.u >> (i * 8)) & 0xff);
946 }
947 } else {
948 v.SetIntValue((arg_info.value.AsUint64() >> (i * 8)) & 0xff);
949 }
950 data_bytes.push_back(v);
951 }
952 Result r = buffer->SetDataWithOffset(data_bytes, offset);
953 if (!r.IsSuccess())
954 return r;
955 }
956
957 return {};
958 }
959
GenerateOpenCLLiteralSamplers()960 Result Pipeline::GenerateOpenCLLiteralSamplers() {
961 for (auto& info : samplers_) {
962 if (info.sampler || info.mask == std::numeric_limits<uint32_t>::max())
963 continue;
964
965 auto literal_sampler = MakeUnique<Sampler>();
966 literal_sampler->SetName("literal." + std::to_string(info.descriptor_set) +
967 "." + std::to_string(info.binding));
968
969 // The values for addressing modes, filtering modes and coordinate
970 // normalization are all defined in the OpenCL header.
971
972 literal_sampler->SetNormalizedCoords(info.mask &
973 kOpenCLNormalizedCoordsBit);
974
975 uint32_t addressing_bits = info.mask & kOpenCLAddressModeBits;
976 AddressMode addressing_mode = AddressMode::kUnknown;
977 if (addressing_bits == kOpenCLAddressModeNone ||
978 addressing_bits == kOpenCLAddressModeClampToEdge) {
979 // CLK_ADDRESS_NONE
980 // CLK_ADDERSS_CLAMP_TO_EDGE
981 addressing_mode = AddressMode::kClampToEdge;
982 } else if (addressing_bits == kOpenCLAddressModeClamp) {
983 // CLK_ADDRESS_CLAMP
984 addressing_mode = AddressMode::kClampToBorder;
985 } else if (addressing_bits == kOpenCLAddressModeRepeat) {
986 // CLK_ADDRESS_REPEAT
987 addressing_mode = AddressMode::kRepeat;
988 } else if (addressing_bits == kOpenCLAddressModeMirroredRepeat) {
989 // CLK_ADDRESS_MIRRORED_REPEAT
990 addressing_mode = AddressMode::kMirroredRepeat;
991 }
992 literal_sampler->SetAddressModeU(addressing_mode);
993 literal_sampler->SetAddressModeV(addressing_mode);
994 // TODO(alan-baker): If this is used with an arrayed image then W should use
995 // kClampToEdge always, but this information is not currently available.
996 literal_sampler->SetAddressModeW(addressing_mode);
997
998 // Next bit is filtering mode.
999 FilterType filtering_mode = FilterType::kUnknown;
1000 if (info.mask & kOpenCLFilterModeNearestBit) {
1001 filtering_mode = FilterType::kNearest;
1002 } else if (info.mask & kOpenCLFilterModeLinearBit) {
1003 filtering_mode = FilterType::kLinear;
1004 }
1005 literal_sampler->SetMagFilter(filtering_mode);
1006 literal_sampler->SetMinFilter(filtering_mode);
1007
1008 // TODO(alan-baker): OpenCL wants the border color to be based on image
1009 // channel orders which aren't accessible.
1010
1011 // clspv never generates multiple MIPMAP levels.
1012 literal_sampler->SetMinLOD(0.0f);
1013 literal_sampler->SetMaxLOD(0.0f);
1014
1015 opencl_literal_samplers_.push_back(std::move(literal_sampler));
1016 info.sampler = opencl_literal_samplers_.back().get();
1017 }
1018
1019 return {};
1020 }
1021
GenerateOpenCLPushConstants()1022 Result Pipeline::GenerateOpenCLPushConstants() {
1023 if (!IsCompute() || GetShaders().empty() ||
1024 GetShaders()[0].GetShader()->GetFormat() != kShaderFormatOpenCLC) {
1025 return {};
1026 }
1027
1028 const auto& shader_info = GetShaders()[0];
1029 if (shader_info.GetPushConstants().empty())
1030 return {};
1031
1032 Result r = CreatePushConstantBuffer();
1033 if (!r.IsSuccess())
1034 return r;
1035
1036 auto* buf = GetPushConstantBuffer().buffer;
1037 assert(buf);
1038
1039 // Determine size and contents of the push constant buffer.
1040 for (const auto& pc : shader_info.GetPushConstants()) {
1041 assert(pc.size % sizeof(uint32_t) == 0);
1042 assert(pc.offset % sizeof(uint32_t) == 0);
1043
1044 if (buf->GetSizeInBytes() < pc.offset + pc.size)
1045 buf->SetSizeInBytes(pc.offset + pc.size);
1046
1047 std::vector<uint32_t> bytes(pc.size / sizeof(uint32_t));
1048 uint32_t base = 0;
1049 switch (pc.type) {
1050 case Pipeline::ShaderInfo::PushConstant::PushConstantType::kDimensions:
1051 // All compute kernel launches are 3D.
1052 bytes[base] = 3;
1053 break;
1054 case Pipeline::ShaderInfo::PushConstant::PushConstantType::kGlobalOffset:
1055 // Global offsets are not currently supported.
1056 bytes[base] = 0;
1057 bytes[base + 1] = 0;
1058 bytes[base + 2] = 0;
1059 break;
1060 case Pipeline::ShaderInfo::PushConstant::PushConstantType::kRegionOffset:
1061 // Region offsets are not currently supported.
1062 bytes[base] = 0;
1063 bytes[base + 1] = 0;
1064 bytes[base + 2] = 0;
1065 break;
1066 }
1067 memcpy(buf->ValuePtr()->data() + pc.offset, bytes.data(),
1068 bytes.size() * sizeof(uint32_t));
1069 }
1070
1071 return {};
1072 }
1073
1074 } // namespace amber
1075