/third_party/mindspore/mindspore/nn/optim/ |
D | adam.py | 94 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 102 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 142 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 155 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 161 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 164 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 171 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 174 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient) [all …]
|
D | lazyadam.py | 34 …th_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument 43 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, 64 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 81 …ne_number(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument 87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 90 …success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, bet… 246 self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") 268 self.beta2_power = self.beta2_power * self.beta2 [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/ |
D | adam.cc | 35 … float beta2_power, float eps, float learning_rate, bool nesterov, int start, int end) { in DoAdam() argument 40 if ((1.f - beta2_power) < 0.0f) { in DoAdam() 45 auto update_lr = learning_rate * std::sqrt(1.f - beta2_power) / (1.f - beta1_power); in DoAdam() 70 auto beta2_power = reinterpret_cast<float *>(in_tensors_.at(4)->MutableData())[0]; in Execute() local 87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute() 141 auto beta2_power = reinterpret_cast<float *>(in_tensors_.at(4)->MutableData())[0]; in OptimizerStep() local 155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | adam_impl.cu | 30 …d ApplyAdamKernel(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, in ApplyAdamKernel() argument 34 …const T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power… in ApplyAdamKernel() 72 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co… in ApplyAdam() argument 75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam() 86 … const float *beta2_power, const float *learning_rate, const float *beta1, 89 …am<half>(const size_t size, const half *gradient, const half *beta1_power, const half *beta2_power,
|
D | adam_impl.cuh | 22 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co…
|
/third_party/mindspore/tests/st/fl/mobile/src/ |
D | adam.py | 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 160 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/st/fl/albert/src/ |
D | adam.py | 110 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 118 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 161 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/ |
D | adam.py | 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 159 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 175 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 181 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 184 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 191 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 194 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/ut/python/optimizer/ |
D | test_auto_grad.py | 292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 313 beta2_power = Tensor(np.array([0.999], dtype=np.float32)) 321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… [all …]
|
/third_party/mindspore/tests/st/auto_monad/ |
D | test_auto_monad_expression.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 70 beta2_power = Tensor(0.999, ms.float32) 76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
|
D | test_effect_optimizer.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 36 beta2_power, lr, beta1, beta2, epsilon, grad) 51 beta2_power = Tensor(0.999, mstype.float32) 58 beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 466 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): argument 467 self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, 483 beta2_power = Tensor(0.999, mstype.float32) 491 beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) 536 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): argument 537 self.fused_sparse_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/ |
D | adam_cpu_kernel.cc | 37 float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex]; in LaunchAdam() local 47 T new_lr = static_cast<T>(lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power)); in LaunchAdam() 72 float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex]; in LaunchAdamNnacl() local 82 float new_lr = lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power); in LaunchAdamNnacl()
|
D | adam_delta_cpu_kernel.cc | 120 auto beta2_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; in Launch() local 132 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in Launch()
|
D | sparse_apply_lazy_adam_cpu_kernel.cc | 132 auto beta2_power = reinterpret_cast<float *>(inputs[4]->addr)[0]; in LaunchKernel() local 155 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
|
D | sparse_apply_adam_cpu_kernel.cc | 152 auto beta2_power = reinterpret_cast<float *>(inputs[4]->addr)[0]; in LaunchKernel() local 177 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
|
/third_party/mindspore/tests/st/ops/ascend/test_aicpu_ops/ |
D | test_fused_sparse_lazy_adam.py | 26 beta2_power = 0.999 variable 41 return self.fused_sparse_lazy_adam(self.var, self.m, self.v, beta1_power, beta2_power,
|
D | test_fused_sparse_adam.py | 26 beta2_power = 0.999 variable 41 …return self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2…
|
/third_party/mindspore/tests/st/ops/cpu/ |
D | test_sparse_apply_adam_op.py | 26 beta2_power = 0.999 variable 42 …out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,…
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ |
D | adam_gpu_kernel.h | 58 T *beta2_power = GetDeviceAddress<T>(inputs, 4); in Launch() local 64 …ApplyAdam(inputs[0]->size / sizeof(T), gradient, beta1_power, beta2_power, learning_rate, beta1, b… in Launch()
|
/third_party/mindspore/mindspore/ccsrc/ps/ |
D | optimizer_info_builder.cc | 186 …AddressPtr beta2_power = GenInputAddrPtr<float>(kSparseAdam, "beta2_power", const_cast<float *>(va… in BuildInputs() local 187 MS_EXCEPTION_IF_NULL(beta2_power); in BuildInputs() 201 …return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, … in BuildInputs()
|
D | optimizer_info.cc | 306 … const AddressPtr &beta1_power, const AddressPtr &beta2_power, in SparseAdamOptimInfo() argument 314 MS_EXCEPTION_IF_NULL(beta2_power); in SparseAdamOptimInfo() 325 inputs_.push_back(beta2_power); in SparseAdamOptimInfo()
|
D | optimizer_info.h | 103 … const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
|
/third_party/mindspore/mindspore/ccsrc/transform/graph_ir/op_declare/ |
D | nn_training_ops_declare.cc | 43 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)}, 52 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
|
/third_party/mindspore/config/ |
D | op_info.config | 54 …name": "beta1_power", "param_type": "required"}, {"index": 4, "name": "beta2_power", "param_type":… 55 …name": "beta1_power", "param_type": "required"}, {"index": 4, "name": "beta2_power", "param_type":… 73 …name": "beta1_power", "param_type": "required"}, {"index": 4, "name": "beta2_power", "param_type":… 168 …alse, "param_type": "required", "shape": "all"}, {"index": 4, "name": "beta2_power", "need_compile…
|