/third_party/mindspore/mindspore/nn/optim/ |
D | adam.py | 93 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument 102 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 142 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 155 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 161 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 164 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 171 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 174 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient) [all …]
|
D | lazyadam.py | 34 …th_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument 43 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, 64 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 81 …ne_number(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument 87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 90 …success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, bet… 245 self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") 267 self.beta1_power = self.beta1_power * self.beta1 [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/ |
D | adam.cc | 34 …at *m, float *v, const float *gradient, float *weight, float beta1, float beta2, float beta1_power, in DoAdam() argument 36 if ((1.f - beta1_power) <= 0.0f) { in DoAdam() 45 auto update_lr = learning_rate * std::sqrt(1.f - beta2_power) / (1.f - beta1_power); in DoAdam() 69 auto beta1_power = reinterpret_cast<float *>(in_tensors_.at(3)->MutableData())[0]; in Execute() local 87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute() 140 auto beta1_power = reinterpret_cast<float *>(in_tensors_.at(3)->MutableData())[0]; in OptimizerStep() local 155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | adam_impl.cu | 30 __global__ void ApplyAdamKernel(const size_t size, const T *gradient, const T *beta1_power, const T… in ApplyAdamKernel() argument 34 …t T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power[0]); in ApplyAdamKernel() 72 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co… in ApplyAdam() argument 75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam() 85 template void ApplyAdam<float>(const size_t size, const float *gradient, const float *beta1_power, 89 template void ApplyAdam<half>(const size_t size, const half *gradient, const half *beta1_power, con…
|
D | adam_impl.cuh | 22 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co…
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/ |
D | adam_cpu_kernel.cc | 36 float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; in LaunchAdam() local 44 if (beta1_power - ONE == 0) { in LaunchAdam() 47 T new_lr = static_cast<T>(lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power)); in LaunchAdam() 71 float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; in LaunchAdamNnacl() local 79 if (beta1_power - ONE == 0) { in LaunchAdamNnacl() 82 float new_lr = lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power); in LaunchAdamNnacl()
|
D | adam_delta_cpu_kernel.cc | 116 auto beta1_power = reinterpret_cast<float *>(inputs[2]->addr)[0]; in Launch() local 117 if (beta1_power == 1) { in Launch() 132 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in Launch()
|
D | sparse_apply_lazy_adam_cpu_kernel.cc | 128 auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; in LaunchKernel() local 129 if (beta1_power == 1) { in LaunchKernel() 155 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
|
D | sparse_apply_adam_cpu_kernel.cc | 148 auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; in LaunchKernel() local 149 if (beta1_power == 1) { in LaunchKernel() 177 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
|
/third_party/mindspore/tests/st/fl/mobile/src/ |
D | adam.py | 108 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 160 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/st/fl/albert/src/ |
D | adam.py | 109 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument 118 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 161 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/ |
D | adam.py | 108 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 159 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) 175 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument 181 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 184 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta… 191 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument 194 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
|
/third_party/mindspore/tests/ut/python/optimizer/ |
D | test_auto_grad.py | 292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 312 beta1_power = Tensor(np.array([0.9], dtype=np.float32)) 321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… [all …]
|
/third_party/mindspore/tests/st/auto_monad/ |
D | test_auto_monad_expression.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 69 beta1_power = Tensor(0.9, ms.float32) 76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
|
D | test_effect_optimizer.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 35 self.apply_adam(self.var, self.m, self.v, beta1_power, 50 beta1_power = Tensor(0.9, mstype.float32) 58 beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 71 def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad): argument 73 beta1_power, lr, beta1, beta2, epsilon, grad) 87 beta1_power = Tensor(0.9, mstype.float32) 93 new_var, new_m, new_v = net(beta1_power, lr, beta1, beta2, epsilon, grad) 466 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): argument 467 self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/tests/st/ops/ascend/test_aicpu_ops/ |
D | test_fused_sparse_lazy_adam.py | 25 beta1_power = 0.9 variable 41 return self.fused_sparse_lazy_adam(self.var, self.m, self.v, beta1_power, beta2_power,
|
D | test_fused_sparse_adam.py | 25 beta1_power = 0.9 variable 41 …return self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2…
|
/third_party/mindspore/tests/st/ops/cpu/ |
D | test_sparse_apply_adam_op.py | 25 beta1_power = 0.9 variable 42 …out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,…
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ |
D | adam_gpu_kernel.h | 57 T *beta1_power = GetDeviceAddress<T>(inputs, 3); in Launch() local 64 …ApplyAdam(inputs[0]->size / sizeof(T), gradient, beta1_power, beta2_power, learning_rate, beta1, b… in Launch()
|
/third_party/mindspore/mindspore/ccsrc/ps/ |
D | optimizer_info_builder.cc | 184 …AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", const_cast<float *>(va… in BuildInputs() local 185 MS_EXCEPTION_IF_NULL(beta1_power); in BuildInputs() 201 …return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, … in BuildInputs()
|
D | optimizer_info.cc | 306 … const AddressPtr &beta1_power, const AddressPtr &beta2_power, in SparseAdamOptimInfo() argument 313 MS_EXCEPTION_IF_NULL(beta1_power); in SparseAdamOptimInfo() 324 inputs_.push_back(beta1_power); in SparseAdamOptimInfo()
|
D | optimizer_info.h | 102 …(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
|
/third_party/mindspore/mindspore/ccsrc/transform/graph_ir/op_declare/ |
D | nn_training_ops_declare.cc | 43 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)}, 52 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)}, 114 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(lr)}, {6, INPUT_DESC(beta1)},
|
/third_party/mindspore/tests/ut/python/ops/ |
D | test_ops.py | 678 self.beta1_power = 0.9 688 out = self.apply_ada_max(self.var, self.m, self.v, self.beta1_power, self.lr,
|
/third_party/mindspore/config/ |
D | op_info.config | 54 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":… 55 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":… 73 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":… 168 …alse, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile… 169 …alse, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile…
|