Home
last modified time | relevance | path

Searched refs:beta1_power (Results 1 – 25 of 25) sorted by relevance

/third_party/mindspore/mindspore/nn/optim/
Dadam.py93 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument
102 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
142 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
155 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument
161 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
164 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta…
171 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument
174 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
[all …]
Dlazyadam.py34 …th_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument
43 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
64 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
81 …ne_number(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, argument
87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
90 …success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, bet…
245 self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
267 self.beta1_power = self.beta1_power * self.beta1
[all …]
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/
Dadam.cc34 …at *m, float *v, const float *gradient, float *weight, float beta1, float beta2, float beta1_power, in DoAdam() argument
36 if ((1.f - beta1_power) <= 0.0f) { in DoAdam()
45 auto update_lr = learning_rate * std::sqrt(1.f - beta2_power) / (1.f - beta1_power); in DoAdam()
69 auto beta1_power = reinterpret_cast<float *>(in_tensors_.at(3)->MutableData())[0]; in Execute() local
87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute()
140 auto beta1_power = reinterpret_cast<float *>(in_tensors_.at(3)->MutableData())[0]; in OptimizerStep() local
155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dadam_impl.cu30 __global__ void ApplyAdamKernel(const size_t size, const T *gradient, const T *beta1_power, const T… in ApplyAdamKernel() argument
34 …t T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power[0]); in ApplyAdamKernel()
72 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co… in ApplyAdam() argument
75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam()
85 template void ApplyAdam<float>(const size_t size, const float *gradient, const float *beta1_power,
89 template void ApplyAdam<half>(const size_t size, const half *gradient, const half *beta1_power, con…
Dadam_impl.cuh22 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, co…
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Dadam_cpu_kernel.cc36 float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; in LaunchAdam() local
44 if (beta1_power - ONE == 0) { in LaunchAdam()
47 T new_lr = static_cast<T>(lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power)); in LaunchAdam()
71 float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; in LaunchAdamNnacl() local
79 if (beta1_power - ONE == 0) { in LaunchAdamNnacl()
82 float new_lr = lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power); in LaunchAdamNnacl()
Dadam_delta_cpu_kernel.cc116 auto beta1_power = reinterpret_cast<float *>(inputs[2]->addr)[0]; in Launch() local
117 if (beta1_power == 1) { in Launch()
132 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in Launch()
Dsparse_apply_lazy_adam_cpu_kernel.cc128 auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; in LaunchKernel() local
129 if (beta1_power == 1) { in LaunchKernel()
155 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
Dsparse_apply_adam_cpu_kernel.cc148 auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; in LaunchKernel() local
149 if (beta1_power == 1) { in LaunchKernel()
177 lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); in LaunchKernel()
/third_party/mindspore/tests/st/fl/mobile/src/
Dadam.py108 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument
117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
160 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument
182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta…
192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument
195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
/third_party/mindspore/tests/st/fl/albert/src/
Dadam.py109 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument
118 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
161 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
176 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument
182 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
185 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta…
192 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument
195 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/
Dadam.py108 …f _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, argument
117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
159 lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
175 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument
181 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
184 …success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta…
191 def _run_off_load_opt(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, moment… argument
194 delat_param = opt(moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, eps, gradient)
/third_party/mindspore/tests/ut/python/optimizer/
Dtest_auto_grad.py292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
312 beta1_power = Tensor(np.array([0.9], dtype=np.float32))
321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
[all …]
/third_party/mindspore/tests/st/auto_monad/
Dtest_auto_monad_expression.py34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
69 beta1_power = Tensor(0.9, ms.float32)
76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
Dtest_effect_optimizer.py34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
35 self.apply_adam(self.var, self.m, self.v, beta1_power,
50 beta1_power = Tensor(0.9, mstype.float32)
58 beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
71 def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad): argument
73 beta1_power, lr, beta1, beta2, epsilon, grad)
87 beta1_power = Tensor(0.9, mstype.float32)
93 new_var, new_m, new_v = net(beta1_power, lr, beta1, beta2, epsilon, grad)
466 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): argument
467 self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/tests/st/ops/ascend/test_aicpu_ops/
Dtest_fused_sparse_lazy_adam.py25 beta1_power = 0.9 variable
41 return self.fused_sparse_lazy_adam(self.var, self.m, self.v, beta1_power, beta2_power,
Dtest_fused_sparse_adam.py25 beta1_power = 0.9 variable
41 …return self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2…
/third_party/mindspore/tests/st/ops/cpu/
Dtest_sparse_apply_adam_op.py25 beta1_power = 0.9 variable
42 …out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,…
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/
Dadam_gpu_kernel.h57 T *beta1_power = GetDeviceAddress<T>(inputs, 3); in Launch() local
64 …ApplyAdam(inputs[0]->size / sizeof(T), gradient, beta1_power, beta2_power, learning_rate, beta1, b… in Launch()
/third_party/mindspore/mindspore/ccsrc/ps/
Doptimizer_info_builder.cc184 …AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", const_cast<float *>(va… in BuildInputs() local
185 MS_EXCEPTION_IF_NULL(beta1_power); in BuildInputs()
201 …return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, … in BuildInputs()
Doptimizer_info.cc306 … const AddressPtr &beta1_power, const AddressPtr &beta2_power, in SparseAdamOptimInfo() argument
313 MS_EXCEPTION_IF_NULL(beta1_power); in SparseAdamOptimInfo()
324 inputs_.push_back(beta1_power); in SparseAdamOptimInfo()
Doptimizer_info.h102 …(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
/third_party/mindspore/mindspore/ccsrc/transform/graph_ir/op_declare/
Dnn_training_ops_declare.cc43 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
52 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
114 … {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(lr)}, {6, INPUT_DESC(beta1)},
/third_party/mindspore/tests/ut/python/ops/
Dtest_ops.py678 self.beta1_power = 0.9
688 out = self.apply_ada_max(self.var, self.m, self.v, self.beta1_power, self.lr,
/third_party/mindspore/config/
Dop_info.config54 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":…
55 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":…
73 …dex": 2, "name": "v", "param_type": "required"}, {"index": 3, "name": "beta1_power", "param_type":…
168 …alse, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile…
169 …alse, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile…