1 #include <ATen/ArrayRef.h>
2 #include <ATen/CPUGeneratorImpl.h>
3 #include <ATen/native/vulkan/ops/Common.h>
4 #include <ATen/native/vulkan/ops/QuantizedFunctions.h>
5 #include <torch/library.h>
6 #include <vector>
7
8 namespace at {
9 namespace native {
10 namespace vulkan {
11 namespace ops {
12
13 using namespace api::utils;
14
15 #ifdef USE_VULKAN_API
16
uniform_(Tensor & self,const double from,const double to,const std::optional<at::Generator>)17 static Tensor& uniform_(
18 Tensor& self,
19 const double from,
20 const double to,
21 const std::optional<at::Generator> /* not implemented */) {
22 TORCH_CHECK(
23 self.is_vulkan(),
24 "Vulkan: In-place operator is only supported on Vulkan tensors.");
25
26 api::Context* const context = api::context();
27
28 vTensor& v_self = convert(self);
29
30 const struct Block final {
31 uvec3 extents;
32 float from;
33 float to;
34 } block{v_self.extents(), static_cast<float>(from), static_cast<float>(to)};
35
36 api::UniformParamsBuffer params(context, block);
37 api::PipelineBarrier pipeline_barrier{};
38
39 context->submit_compute_job(
40 // shader descriptor
41 // shader_descriptor,
42 VK_KERNEL(uniform_),
43 // pipeline barrier
44 pipeline_barrier,
45 // global work group size
46 v_self.extents(),
47 // local work group size
48 adaptive_work_group_size(v_self.extents()),
49 // fence handle
50 VK_NULL_HANDLE,
51 // shader arguments
52 v_self.image(
53 pipeline_barrier,
54 api::PipelineStage::COMPUTE,
55 api::MemoryAccessType::WRITE),
56 // params buffer
57 params.buffer());
58
59 return self;
60 }
61
rand_like(const at::Tensor & input_arg,const std::optional<c10::ScalarType>,const std::optional<c10::Layout>,const std::optional<c10::Device>,const std::optional<bool>,const std::optional<c10::MemoryFormat>)62 static Tensor rand_like(
63 const at::Tensor& input_arg,
64 const std::optional<c10::ScalarType> /* not implemented */,
65 const std::optional<c10::Layout> /* not implemented */,
66 const std::optional<c10::Device> /* not implemented */,
67 const std::optional<bool> /* not implemented */,
68 const std::optional<c10::MemoryFormat> /* not implemented */) {
69 // Returns a tensor with the same size as input that is filled with random
70 // numbers from a uniform distribution on the interval [0,1). To match the CPU
71 // implementation, we simplify the range to [0,1] and tolerate the small
72 // chance of 1 being sampled.
73 return input_arg.clone().detach().uniform_(0.0, 1.0);
74 }
75
normal_(Tensor & self,const double mean,const double std,const std::optional<at::Generator>)76 static Tensor& normal_(
77 Tensor& self,
78 const double mean,
79 const double std,
80 const std::optional<at::Generator> /* not implemented */) {
81 TORCH_CHECK(
82 self.is_vulkan(),
83 "Vulkan: In-place operator is only supported on Vulkan tensors.");
84
85 TORCH_CHECK(std >= 0, "Vulkan: Standard deviation (std) can be negative.");
86
87 api::Context* const context = api::context();
88
89 vTensor& v_self = convert(self);
90
91 const struct Block final {
92 uvec3 extents;
93 float mean;
94 float std;
95 } block{v_self.extents(), static_cast<float>(mean), static_cast<float>(std)};
96
97 api::UniformParamsBuffer params(context, block);
98 api::PipelineBarrier pipeline_barrier{};
99
100 context->submit_compute_job(
101 // shader descriptor
102 // shader_descriptor,
103 VK_KERNEL(normal_),
104 // pipeline barrier
105 pipeline_barrier,
106 // global work group size
107 v_self.extents(),
108 // local work group size
109 adaptive_work_group_size(v_self.extents()),
110 // fence handle
111 VK_NULL_HANDLE,
112 // shader arguments
113 v_self.image(
114 pipeline_barrier,
115 api::PipelineStage::COMPUTE,
116 api::MemoryAccessType::WRITE),
117 // params buffer
118 params.buffer());
119
120 return self;
121 }
122
randn_like(const at::Tensor & input_arg,const std::optional<c10::ScalarType>,const std::optional<c10::Layout>,const std::optional<c10::Device>,const std::optional<bool>,const std::optional<c10::MemoryFormat>)123 static Tensor randn_like(
124 const at::Tensor& input_arg,
125 const std::optional<c10::ScalarType> /* not implemented */,
126 const std::optional<c10::Layout> /* not implemented */,
127 const std::optional<c10::Device> /* not implemented */,
128 const std::optional<bool> /* not implemented */,
129 const std::optional<c10::MemoryFormat> /* not implemented */) {
130 // Returns a tensor with the same size as input that is filled with random
131 // numbers from a normal distribution with mean 0 and standard deviation 1.
132 return input_arg.clone().detach().normal_(0.0, 1.0);
133 }
134
TORCH_LIBRARY_IMPL(aten,Vulkan,m)135 TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
136 m.impl(TORCH_SELECTIVE_NAME("aten::uniform_"), TORCH_FN(uniform_));
137 m.impl(TORCH_SELECTIVE_NAME("aten::rand_like"), TORCH_FN(rand_like));
138 m.impl(TORCH_SELECTIVE_NAME("aten::normal_"), TORCH_FN(normal_));
139 m.impl(TORCH_SELECTIVE_NAME("aten::randn_like"), TORCH_FN(randn_like));
140 }
141
142 #endif /* USE_VULKAN_API */
143
144 } // namespace ops
145 } // namespace vulkan
146 } // namespace native
147 } // namespace at
148