• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ps/optimizer_info.h"
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <functional>
22 #include "ps/util.h"
23 
24 namespace mindspore {
25 namespace ps {
AddWorkspace(const AddressPtr & workspace)26 void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) {
27   MS_EXCEPTION_IF_NULL(workspace);
28   workspaces_.push_back(workspace);
29 }
30 
inputs() const31 const std::vector<AddressPtr> &OptimizerInfo::inputs() const { return inputs_; }
32 
workspaces() const33 const std::vector<AddressPtr> &OptimizerInfo::workspaces() const { return workspaces_; }
34 
outputs() const35 const std::vector<AddressPtr> &OptimizerInfo::outputs() const { return outputs_; }
36 
IsSparse() const37 bool OptimizerInfo::IsSparse() const { return false; }
38 
indice_size() const39 const size_t OptimizerInfo::indice_size() const { return 0; }
40 
grad_index()41 size_t OptimizerInfo::grad_index() { return 0; }
42 
indices_index()43 size_t OptimizerInfo::indices_index() { return 0; }
44 
45 template <typename T>
UpdateOptimInputValue(const std::string & optim_type,const std::string & input_name,void * data,const Lengths & lens)46 void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
47                                           const Lengths &lens) {
48   MS_EXCEPTION_IF_NULL(data);
49   if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
50     MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
51   }
52   const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
53   const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
54   if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
55     MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
56   }
57 
58   size_t origin_index = origin_input_map.at(input_name);
59   size_t ps_send_index = ps_send_index_map.at(input_name);
60   if (ps_send_index >= lens.size() || origin_index >= inputs_.size()) {
61     MS_LOG(EXCEPTION) << "Index is out of bound for optimizer " << optim_type << ", origin_index:" << origin_index
62                       << ", ps_send_index:" << ps_send_index;
63   }
64   EXC_IF_VEC_IDX_OOB(lens, ps_send_index);
65   size_t size = IntToSize(lens[ps_send_index]) * sizeof(T);
66   int offset = std::accumulate(lens.begin(), lens.begin() + SizeToInt(ps_send_index), 0, std::plus<int>());
67   AddressPtr optim_input = inputs_[origin_index];
68   MS_EXCEPTION_IF_NULL(optim_input);
69 
70   void *dst_data = optim_input->addr;
71   T *src_data = reinterpret_cast<T *>(data) + offset;
72   MS_EXCEPTION_IF_NULL(dst_data);
73   MS_EXCEPTION_IF_NULL(src_data);
74   int64_t ret = memcpy_s(optim_input->addr, optim_input->size, src_data, size);
75   if (ret != 0) {
76     MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
77     return;
78   }
79   return;
80 }
81 
Accumulate(const Values & values,const Lengths & lengths)82 void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
83   MS_EXCEPTION_IF_NULL(gradient()->addr);
84   float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
85   size_t size = gradient()->size / sizeof(float);
86   size_t grad_index = this->grad_index();
87   size_t grad_offset = 0;
88   for (size_t i = 0; i < grad_index; i++) {
89     grad_offset += IntToSize(lengths[i]);
90   }
91   float *grad_data = const_cast<float *>(values.data()) + grad_offset;
92   MS_EXCEPTION_IF_NULL(grad_data);
93 #define google mindspore_private
94   CHECK_EQ(size, IntToSize(lengths[grad_index]));
95 #undef google
96   for (size_t i = 0; i < size; i++) {
97     accum_grad_data[i] += grad_data[i];
98   }
99 }
100 
ComputeMean(const std::vector<std::vector<size_t>> &,size_t n,size_t,size_t)101 void DenseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &, size_t n, size_t, size_t) {
102   if (n > 1) {
103     MS_EXCEPTION_IF_NULL(gradient()->addr);
104     float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
105     size_t size = gradient()->size / sizeof(float);
106     for (size_t i = 0; i < size; i++) {
107       accum_grad_data[i] /= n;
108     }
109   }
110 }
111 
Reset()112 void DenseOptimInfo::Reset() {
113   MS_EXCEPTION_IF_NULL(gradient()->addr);
114   int64_t ret = memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size);
115   if (ret != 0) {
116     MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
117     return;
118   }
119 }
120 
Accumulate(const Values & values,const Lengths & lengths)121 void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
122   // Append grad data to the end
123   MS_EXCEPTION_IF_NULL(gradient()->addr);
124   float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
125 
126   size_t grad_index = this->grad_index();
127   size_t grad_offset = 0;
128   for (size_t i = 0; i < grad_index; i++) {
129     grad_offset += IntToSize(lengths[i]);
130   }
131   float *incr_grad_data = const_cast<float *>(values.data()) + grad_offset;
132   MS_EXCEPTION_IF_NULL(incr_grad_data);
133 
134   size_t incr_grad_size = IntToSize(lengths[grad_index]) * sizeof(float);
135   size_t dst_size = incr_grad_size;
136   size_t src_size = incr_grad_size;
137   void *dst_data = accum_grad_data + grads_offset_;
138   void *src_data = incr_grad_data;
139   MS_EXCEPTION_IF_NULL(dst_data);
140   MS_EXCEPTION_IF_NULL(src_data);
141   int64_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
142   if (ret != 0) {
143     MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
144     return;
145   }
146   grads_offset_ += IntToSize(lengths[grad_index]);
147   gradient()->size += incr_grad_size;
148 
149   // Append indice data to the end
150   MS_EXCEPTION_IF_NULL(indices()->addr);
151   int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
152   MS_EXCEPTION_IF_NULL(accum_indices_data);
153 
154   size_t indices_index = this->indices_index();
155   size_t indice_offset = 0;
156   for (size_t i = 0; i < indices_index; i++) {
157     indice_offset += IntToSize(lengths[i]);
158   }
159 
160   void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
161   MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
162   int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
163   MS_EXCEPTION_IF_NULL(incr_indice_data);
164 
165   size_t incr_indice_size = lengths[indices_index];
166   size_t incr_indice_data_size = incr_indice_size * sizeof(int);
167   dst_size = incr_indice_data_size;
168   src_size = incr_indice_data_size;
169   dst_data = accum_indices_data + indices_offset_;
170   src_data = incr_indice_data;
171   MS_EXCEPTION_IF_NULL(dst_data);
172   MS_EXCEPTION_IF_NULL(src_data);
173   auto ret2 = memcpy_s(dst_data, dst_size, src_data, src_size);
174   if (ret2 != 0) {
175     MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
176     return;
177   }
178   indices_offset_ += IntToSize(lengths[indices_index]);
179   indices()->size += incr_indice_data_size;
180 }
181 
ComputeMean(const std::vector<std::vector<size_t>> & shapes,size_t n,size_t server_num,size_t rank_id)182 void SparseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
183                                   size_t rank_id) {
184   if (n == 0 || indices()->size == 0) {
185     MS_LOG(EXCEPTION) << "The size of shapes or indices are 0.";
186   }
187   size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
188   size_t segment_size = gradient()->size / indices()->size;
189 
190   std::vector<float> new_grad(indices_size * segment_size);
191   std::vector<int> new_indices(indices_size);
192   mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
193 
194   if (shapes.size() < 2 || shapes[1].empty()) {
195     MS_LOG(EXCEPTION) << "No input shape found";
196   }
197   auto input_shapes = shapes[1];
198   if (input_shapes.size() == 0) {
199     MS_LOG(EXCEPTION) << "Invalid input shapes";
200   }
201   size_t first_dim_size = input_shapes.front();
202   size_t outer_dim_size = segment_size;
203 
204   if (first_dim_size == 0 || outer_dim_size == 0) {
205     MS_LOG(ERROR) << "Invalid first dim size";
206   }
207 
208   MS_EXCEPTION_IF_NULL(gradient()->addr);
209   MS_EXCEPTION_IF_NULL(indices()->addr);
210   float *grad_data = reinterpret_cast<float *>(gradient()->addr);
211   int *indices_data = reinterpret_cast<int *>(indices()->addr);
212 
213   if (sharded_) {
214     size_t original_row_count = input_shapes.front();
215     if (original_row_count > 0) {
216       size_t offset = 0;
217       std::map<int64_t, int64_t> rank_dims =
218         Util::AllRankLocalShard(SizeToLong(original_row_count), SizeToLong(rank_id), SizeToLong(server_num));
219       for (size_t i = 0; i < rank_id; i++) {
220         if (rank_dims.count(i) == 0) {
221           MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
222         }
223         offset += LongToSize(rank_dims[i]);
224       }
225       for (size_t j = 0; j < indices_size; j++) {
226         indices_data[j] -= SizeToInt(offset);
227       }
228     }
229   }
230 
231   Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
232                              &unique_sparse_grad);
233 
234   size_t reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
235   MS_EXCEPTION_IF_NULL(unique_sparse_grad.value_);
236   int ret = memcpy_s(gradient()->addr, gradient()->size, unique_sparse_grad.value_, reduced_grad_size);
237   if (ret != 0) {
238     MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
239     return;
240   }
241 
242   size_t reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
243   MS_EXCEPTION_IF_NULL(unique_sparse_grad.indices_);
244   ret = memcpy_s(indices()->addr, indices()->size, unique_sparse_grad.indices_, reduced_indice_size);
245   if (ret != 0) {
246     MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
247     return;
248   }
249 
250   gradient()->size = reduced_grad_size;
251   indices()->size = reduced_indice_size;
252 
253   for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
254     grad_data[i] = grad_data[i] / n;
255   }
256 }
257 
Reset()258 void SparseOptimInfo::Reset() {
259   gradient()->size = 0;
260   indices()->size = 0;
261   grads_offset_ = 0;
262   indices_offset_ = 0;
263 }
264 
MomentumOptimInfo(const AddressPtr & weight,const AddressPtr & accumulate,const AddressPtr & learning_rate,const AddressPtr & gradient,const AddressPtr & momentum)265 MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
266                                      const AddressPtr &learning_rate, const AddressPtr &gradient,
267                                      const AddressPtr &momentum) {
268   MS_EXCEPTION_IF_NULL(weight);
269   MS_EXCEPTION_IF_NULL(accumulate);
270   MS_EXCEPTION_IF_NULL(learning_rate);
271   MS_EXCEPTION_IF_NULL(gradient);
272   MS_EXCEPTION_IF_NULL(momentum);
273   inputs_.push_back(weight);
274   inputs_.push_back(accumulate);
275   inputs_.push_back(learning_rate);
276   inputs_.push_back(gradient);
277   inputs_.push_back(momentum);
278 }
279 
Update(const Values & values,const Lengths & lens)280 void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
281   UpdateOptimInputValue<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
282 }
283 
indice_size() const284 const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
285 
gradient()286 const AddressPtr &MomentumOptimInfo::gradient() {
287   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
288   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
289   MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
290   return inputs_[origin_grad_index];
291 }
292 
indices()293 const AddressPtr &MomentumOptimInfo::indices() {
294   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
295   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
296   MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
297   return inputs_[origin_grad_index];
298 }
299 
grad_index()300 size_t MomentumOptimInfo::grad_index() {
301   size_t ps_grad_index = kMomentumPSSendIdx.at("grad");
302   return ps_grad_index;
303 }
304 
SparseAdamOptimInfo(const AddressPtr & weight,const AddressPtr & m,const AddressPtr & v,const AddressPtr & beta1_power,const AddressPtr & beta2_power,const AddressPtr & learning_rate,const AddressPtr & beta1,const AddressPtr & beta2,const AddressPtr & epsilon,const AddressPtr & grad,const AddressPtr & indices,bool sharded)305 SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
306                                          const AddressPtr &beta1_power, const AddressPtr &beta2_power,
307                                          const AddressPtr &learning_rate, const AddressPtr &beta1,
308                                          const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
309                                          const AddressPtr &indices, bool sharded) {
310   MS_EXCEPTION_IF_NULL(weight);
311   MS_EXCEPTION_IF_NULL(m);
312   MS_EXCEPTION_IF_NULL(v);
313   MS_EXCEPTION_IF_NULL(beta1_power);
314   MS_EXCEPTION_IF_NULL(beta2_power);
315   MS_EXCEPTION_IF_NULL(learning_rate);
316   MS_EXCEPTION_IF_NULL(beta1);
317   MS_EXCEPTION_IF_NULL(beta2);
318   MS_EXCEPTION_IF_NULL(epsilon);
319   MS_EXCEPTION_IF_NULL(grad);
320   MS_EXCEPTION_IF_NULL(indices);
321   inputs_.push_back(weight);
322   inputs_.push_back(m);
323   inputs_.push_back(v);
324   inputs_.push_back(beta1_power);
325   inputs_.push_back(beta2_power);
326   inputs_.push_back(learning_rate);
327   inputs_.push_back(beta1);
328   inputs_.push_back(beta2);
329   inputs_.push_back(epsilon);
330   inputs_.push_back(grad);
331   inputs_.push_back(indices);
332   grads_offset_ = grad->size / sizeof(float);
333   indices_offset_ = indices->size / sizeof(int);
334   sharded_ = sharded;
335 }
336 
Update(const Values & values,const Lengths & lens)337 void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
338   UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
339   UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
340   UpdateOptimInputValue<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
341   UpdateOptimInputValue<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
342   UpdateOptimInputValue<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
343   UpdateOptimInputValue<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
344 }
345 
gradient()346 const AddressPtr &SparseAdamOptimInfo::gradient() {
347   size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
348   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
349   MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
350   return inputs_[origin_grad_index];
351 }
352 
indices()353 const AddressPtr &SparseAdamOptimInfo::indices() {
354   size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
355   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
356   MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
357   return inputs_[origin_indices_index];
358 }
359 
IsSparse() const360 bool SparseAdamOptimInfo::IsSparse() const { return true; }
361 
grad_index()362 size_t SparseAdamOptimInfo::grad_index() {
363   size_t ps_grad_index = kSparseAdamPSSendIdx.at("grad");
364   return ps_grad_index;
365 }
366 
indices_index()367 size_t SparseAdamOptimInfo::indices_index() {
368   size_t ps_indices_index = kSparseAdamPSSendIdx.at("indices");
369   return ps_indices_index;
370 }
371 
SparseFtrlOptimInfo(const AddressPtr & weight,const AddressPtr & accum,const AddressPtr & linear,const AddressPtr & grad,const AddressPtr & indices,bool sharded)372 SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
373                                          const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
374   MS_EXCEPTION_IF_NULL(weight);
375   MS_EXCEPTION_IF_NULL(accum);
376   MS_EXCEPTION_IF_NULL(linear);
377   MS_EXCEPTION_IF_NULL(grad);
378   MS_EXCEPTION_IF_NULL(indices);
379   inputs_.push_back(weight);
380   inputs_.push_back(accum);
381   inputs_.push_back(linear);
382   inputs_.push_back(grad);
383   inputs_.push_back(indices);
384   grads_offset_ = grad->size / sizeof(float);
385   indices_offset_ = indices->size / sizeof(int);
386   sharded_ = sharded;
387 }
388 
gradient()389 const AddressPtr &SparseFtrlOptimInfo::gradient() {
390   size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
391   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
392   MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
393   return inputs_[origin_grad_index];
394 }
395 
indices()396 const AddressPtr &SparseFtrlOptimInfo::indices() {
397   size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
398   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
399   MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
400   return inputs_[origin_indices_index];
401 }
402 
IsSparse() const403 bool SparseFtrlOptimInfo::IsSparse() const { return true; }
404 
grad_index()405 size_t SparseFtrlOptimInfo::grad_index() {
406   size_t ps_grad_index = kSparseFtrlPSSendIdx.at("grad");
407   return ps_grad_index;
408 }
409 
indices_index()410 size_t SparseFtrlOptimInfo::indices_index() {
411   size_t ps_indices_index = kSparseFtrlPSSendIdx.at("indices");
412   return ps_indices_index;
413 }
414 }  // namespace ps
415 }  // namespace mindspore
416