1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "backend/kernel_compiler/cpu/unique_cpu_kernel.h"
18 #include "runtime/device/cpu/cpu_device_address.h"
19
20 namespace mindspore {
21 namespace kernel {
22 constexpr size_t kBucketSortThreshold = 100000;
InitKernel(const CNodePtr & kernel_node)23 void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
24 MS_EXCEPTION_IF_NULL(kernel_node);
25 node_wpt_ = kernel_node;
26 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
27 if (input_shape.size() != 1) {
28 MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but UniqueCPUKernel only support 1d.";
29 }
30 input_size_ = input_shape[0];
31 dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
32 if (AnfAlgo::HasNodeAttr(SORTED, kernel_node)) {
33 sorted_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, SORTED);
34 }
35 }
36
InitInputOutputSize(const CNodePtr & kernel_node)37 void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
38 CPUKernel::InitInputOutputSize(kernel_node);
39 (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
40 (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
41 (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
42 }
43
Launch(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace,const std::vector<kernel::AddressPtr> & outputs)44 bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
45 const std::vector<kernel::AddressPtr> &workspace,
46 const std::vector<kernel::AddressPtr> &outputs) {
47 if (dtype_ == kNumberTypeInt32) {
48 LaunchKernel<int, int>(inputs, workspace, outputs);
49 } else if (dtype_ == kNumberTypeInt64) {
50 LaunchKernel<int64_t, int64_t>(inputs, workspace, outputs);
51 } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) {
52 LaunchKernel<float, int>(inputs, workspace, outputs);
53 } else {
54 MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
55 }
56 if (!node_wpt_.expired()) {
57 auto node_ = node_wpt_.lock();
58 if (!node_) {
59 MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
60 }
61 std::vector<size_t> out_shape;
62 (void)out_shape.emplace_back(output_size_);
63 size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
64 std::vector<TypeId> dtypes(output_num);
65 for (size_t i = 0; i < output_num; i++) {
66 dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
67 }
68 AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get());
69 }
70 return true;
71 }
72
73 template <typename DataType, typename IndexType>
LaunchKernel(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> & workspace,const std::vector<AddressPtr> & outputs)74 void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
75 const std::vector<AddressPtr> &outputs) {
76 if (input_size_ == 0) {
77 return;
78 }
79 if (inputs.size() < 1) {
80 MS_LOG(EXCEPTION) << "Input size should be large than 0!";
81 }
82 if (workspace.size() < 3) {
83 MS_LOG(EXCEPTION) << "Workspace size should be large than 2!";
84 }
85 if (outputs.size() < 2) {
86 MS_LOG(EXCEPTION) << "Output size should be large than 1!";
87 }
88 auto params = std::make_shared<UniqueParam<DataType, IndexType>>();
89 params->input_ = reinterpret_cast<DataType *>(inputs[0]->addr);
90 params->input_idx_ = reinterpret_cast<IndexType *>(workspace[0]->addr);
91 params->workspace_ = reinterpret_cast<DataType *>(workspace[1]->addr);
92 params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr);
93 params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr);
94 params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr);
95 params->input_size_ = static_cast<IndexType>(input_size_);
96 params->output_size_ = 0;
97
98 params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
99 if (sorted_) {
100 params->need_sort_ = true;
101 if (input_size_ < kBucketSortThreshold) {
102 Unique(params);
103 } else {
104 BucketUnique(params);
105 }
106 } else {
107 params->need_sort_ = false;
108 Unique(params);
109 }
110 output_size_ = static_cast<size_t>(params->output_size_);
111 }
112 } // namespace kernel
113 } // namespace mindspore
114