• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/cpu/unique_cpu_kernel.h"
18 #include "runtime/device/cpu/cpu_device_address.h"
19 
20 namespace mindspore {
21 namespace kernel {
22 constexpr size_t kBucketSortThreshold = 100000;
InitKernel(const CNodePtr & kernel_node)23 void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
24   MS_EXCEPTION_IF_NULL(kernel_node);
25   node_wpt_ = kernel_node;
26   auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
27   if (input_shape.size() != 1) {
28     MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but UniqueCPUKernel only support 1d.";
29   }
30   input_size_ = input_shape[0];
31   dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
32   if (AnfAlgo::HasNodeAttr(SORTED, kernel_node)) {
33     sorted_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, SORTED);
34   }
35 }
36 
InitInputOutputSize(const CNodePtr & kernel_node)37 void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
38   CPUKernel::InitInputOutputSize(kernel_node);
39   (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
40   (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
41   (void)workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
42 }
43 
Launch(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace,const std::vector<kernel::AddressPtr> & outputs)44 bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
45                              const std::vector<kernel::AddressPtr> &workspace,
46                              const std::vector<kernel::AddressPtr> &outputs) {
47   if (dtype_ == kNumberTypeInt32) {
48     LaunchKernel<int, int>(inputs, workspace, outputs);
49   } else if (dtype_ == kNumberTypeInt64) {
50     LaunchKernel<int64_t, int64_t>(inputs, workspace, outputs);
51   } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) {
52     LaunchKernel<float, int>(inputs, workspace, outputs);
53   } else {
54     MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
55   }
56   if (!node_wpt_.expired()) {
57     auto node_ = node_wpt_.lock();
58     if (!node_) {
59       MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
60     }
61     std::vector<size_t> out_shape;
62     (void)out_shape.emplace_back(output_size_);
63     size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
64     std::vector<TypeId> dtypes(output_num);
65     for (size_t i = 0; i < output_num; i++) {
66       dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
67     }
68     AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get());
69   }
70   return true;
71 }
72 
73 template <typename DataType, typename IndexType>
LaunchKernel(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> & workspace,const std::vector<AddressPtr> & outputs)74 void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
75                                    const std::vector<AddressPtr> &outputs) {
76   if (input_size_ == 0) {
77     return;
78   }
79   if (inputs.size() < 1) {
80     MS_LOG(EXCEPTION) << "Input size should be large than 0!";
81   }
82   if (workspace.size() < 3) {
83     MS_LOG(EXCEPTION) << "Workspace size should be large than 2!";
84   }
85   if (outputs.size() < 2) {
86     MS_LOG(EXCEPTION) << "Output size should be large than 1!";
87   }
88   auto params = std::make_shared<UniqueParam<DataType, IndexType>>();
89   params->input_ = reinterpret_cast<DataType *>(inputs[0]->addr);
90   params->input_idx_ = reinterpret_cast<IndexType *>(workspace[0]->addr);
91   params->workspace_ = reinterpret_cast<DataType *>(workspace[1]->addr);
92   params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr);
93   params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr);
94   params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr);
95   params->input_size_ = static_cast<IndexType>(input_size_);
96   params->output_size_ = 0;
97 
98   params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
99   if (sorted_) {
100     params->need_sort_ = true;
101     if (input_size_ < kBucketSortThreshold) {
102       Unique(params);
103     } else {
104       BucketUnique(params);
105     }
106   } else {
107     params->need_sort_ = false;
108     Unique(params);
109   }
110   output_size_ = static_cast<size_t>(params->output_size_);
111 }
112 }  // namespace kernel
113 }  // namespace mindspore
114