Home
last modified time | relevance | path

Searched refs:beta2 (Results 1 – 25 of 71) sorted by relevance

123

/third_party/mindspore/mindspore/nn/optim/
Dadam.py37 def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_fi… argument
73 next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32)
74 - beta2, op_square(gradient_fp32))
94 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument
103 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
118 success = F.depend(success, F.assign(v, op_mul(beta2, v)))
129 op_mul(F.tuple_to_array((1.0,)) - beta2, op_square(grad_value)))
155 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument
[all …]
Dlazyadam.py35 beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable):
44 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
62 next_v = v_slice * beta2 + values * values * (1 - beta2)
82 … beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable):
87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
90 …ccess = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
95 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument
98 validator.check_value_type("beta2", beta2, [float], prim_name)
[all …]
Dlamb.py39 def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_f… argument
82 …next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradie…
87 op_pow(beta2, op_cast(global_step + num_one, mstype.float32)))
121 def _update_run_op_ascend(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, … argument
155 beta1, 1.0 - beta1, beta2, 1.0 - beta2, eps,
164 def _check_param_value(beta1, beta2, eps, prim_name): argument
166 validator.check_value_type("beta2", beta2, [float], prim_name)
169 validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name)
295 def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
297 _check_param_value(beta1, beta2, eps, self.cls_name)
[all …]
/third_party/mindspore/tests/st/fl/mobile/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
87 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\
88 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32)))
109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument
118 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/tests/st/fl/albert/src/
Dadam.py34 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
41 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
43 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
49 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
88 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\
89 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32)))
110 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument
119 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
87 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\
88 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32)))
109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument
118 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dadam_impl.cu31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel()
38 v[i] += (gradient[i] * gradient[i] - v[i]) * (one - beta2[0]); in ApplyAdamKernel()
45 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument
49 T next_v = beta2[0] * v[i] + (1 - beta2[0]) * gradient[i] * gradient[i]; in AdamWeightDecayKernel()
59 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument
63 …half next_v = __float2half(beta2[0]) * v[i] + __float2half(1 - beta2[0]) * gradient[i] * gradient[… in AdamWeightDecayKernel()
73 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… in ApplyAdam()
75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam()
79 … const float *beta2, const float *epsilon, const float *decay, T *variable, T *m, T *v, in AdamWeightDecayOp() argument
81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp()
[all …]
Dadam_weight_decay_impl.cu22 … const float *one_sub_beta1, const float *beta2, const float *one_sub_beta2, in AdamWeightDecayKernel() argument
27 float next_v = beta2[0] * v[i] + one_sub_beta2[0] * gradient[i] * gradient[i]; in AdamWeightDecayKernel()
40 … const float *beta2, const float *one_sub_beta2, const float *epsilon, const float *lr, in AdamWeightDecay() argument
43 …element_num_, need_decay, beta1, one_sub_beta1, beta2, one_sub_beta2, epsilon, lr, weight_decay, m… in AdamWeightDecay()
48 … const float *one_sub_beta1, const float *beta2, const float *one_sub_beta2,
Dadam_impl.cuh23 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea…
26 … const float *beta2, const float *epsilon, const float *decay, T *variable, T *m, T *v,
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dadam_fp32.h35 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con…
37 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil…
39 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e…
41 size_t FusedCastAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float …
43 size_t FusedCastAdamFp16(int16_t *var16, float *m, float *v, float lr, float beta1, float beta2, fl…
Dadam_fp32.c20 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… in AdamFp32() argument
26 __m256 coeff2_r = _mm256_set1_ps(1 - beta2); in AdamFp32()
79 v[c1] += (gradient[c1] * gradient[c1] - v[c1]) * (1 - beta2); in AdamFp32()
89 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… in AdamDeltaFp32() argument
95 __m256 coeff2_r = _mm256_set1_ps(1.0f - beta2); in AdamDeltaFp32()
97 __m256 beta2_r = _mm256_set1_ps(beta2); in AdamDeltaFp32()
144 v[c1] *= beta2; in AdamDeltaFp32()
145 v[c1] += (1 - beta2) * gradient[c1] * gradient[c1]; in AdamDeltaFp32()
155 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e… in AdamWeightDecayFp32() argument
159 const float beta2_minus = 1 - beta2; in AdamWeightDecayFp32()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Dadam_delta_cpu_kernel.cc35 void AdamDeltaCPUKernel::LaunchAdamDelta(T *delta, T *m, T *v, float lr, float beta1, float beta2, … in LaunchAdamDelta() argument
39 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta()
40 … (void)AdamDeltaFp32(delta, m, v, lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamDelta()
43 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta()
47 v[c1] *= beta2; in LaunchAdamDelta()
48 v[c1] += (1 - beta2) * gradient[c1] * gradient[c1]; in LaunchAdamDelta()
123 auto beta2 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in Launch() local
135 LaunchAdamDelta<float>(delta, m, v, lr, beta1, beta2, epsilon, grad, lens); in Launch()
Dfused_cast_adam_weight_decay_cpu_kernel.cc32 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp32() local
37 const auto beta2_minus = 1 - beta2; in LaunchFusedCastAdamFp32()
44 …size_t i = FusedCastAdamFp32(var, m, v, lr, beta1, beta2, epsilon, decay, reinterpret_cast<int16_t… in LaunchFusedCastAdamFp32()
66 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp16() local
71 const auto beta2_minus = 1 - beta2; in LaunchFusedCastAdamFp16()
78 …size_t i = FusedCastAdamFp16(reinterpret_cast<int16_t *>(var16), m, v, lr, beta1, beta2, epsilon, … in LaunchFusedCastAdamFp16()
Dadam_cpu_kernel.cc40 T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdam() local
50 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdam()
54 v[i] += (gradient[i] * gradient[i] - v[i]) * (one - beta2); in LaunchAdam()
75 float beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchAdamNnacl() local
86 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdamNnacl()
87 … int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamNnacl()
Dsparse_apply_adam_cpu_kernel.cc34 const auto beta2 = input_params->beta2_; in ComputeAdam() local
49 v[j] += (1 - beta2) * summed_grad * summed_grad; in ComputeAdam()
63 const auto beta2 = input_params->beta2_; in ComputeMomentum() local
66 v[i] *= beta2; in ComputeMomentum()
155 auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; in LaunchKernel() local
183 input_params.beta2_ = beta2; in LaunchKernel()
Dsparse_apply_lazy_adam_cpu_kernel.cc35 const auto beta2 = input_params->beta2_; in ComputeLazyAdam() local
51 v[j] = beta2 * v[j] + (1 - beta2) * summed_grad * summed_grad; in ComputeLazyAdam()
135 auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; in LaunchKernel() local
162 input_params.beta2_ = beta2; in LaunchKernel()
Dadam_weight_decay_cpu_kernel.cc43 T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]); in LaunchAdamWeightDecay() local
49 const T beta2_minus = one - beta2; in LaunchAdamWeightDecay()
74 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchAdamWeightDecayNnacl() local
83 … int ret = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end); in LaunchAdamWeightDecayNnacl()
/third_party/mindspore/tests/st/ops/graph_kernel/
Dtest_fused_adam.py47 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument
56 next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(one_sub_beta_2,
91 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument
102 next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(one_sub_beta_2,
119 def CalFusedAdam(beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, … argument
122 v_expect = beta2 * v + one_sub_beta_2 * gradient * gradient
133 beta2 = np.array([0.999]).astype(np.float32)
146 …_ = opt(Tensor(beta1), Tensor(beta2), Tensor(one_sub_beta_1), Tensor(one_sub_beta_2), Tensor(gradi…
149 beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, lr,
162 beta2 = np.array([0.999]).astype(np.float32)
[all …]
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/
Dadam.cc34 …at *m, float *v, const float *gradient, float *weight, float beta1, float beta2, float beta1_power, in DoAdam() argument
47 const float one_minus_beta2 = 1.f - beta2; in DoAdam()
73 auto beta2 = reinterpret_cast<float *>(in_tensors_.at(7)->MutableData())[0]; in Execute() local
87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute()
144 auto beta2 = reinterpret_cast<float *>(in_tensors_.at(7)->MutableData())[0]; in OptimizerStep() local
155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
/third_party/mindspore/tests/st/ops/gpu/
Dtest_adam_fusion.py46 def construct(self, beta1, beta2, gradient, eps, weight_decay_tensor, lr): argument
54 …next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.fl…
55 beta2, self.op_square(gradient_fp32))
73 beta2 = Tensor(np.array([0.999]).astype(np.float32))
80 _ = opt(beta1, beta2, gradient, eps, weight_decay_tensor, lr)
/third_party/mindspore/tests/ut/python/optimizer/
Dtest_auto_grad.py292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
316 beta2 = Tensor(np.array([0.999], dtype=np.float32))
321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
[all …]
/third_party/mindspore/tests/ut/python/ir/
Dtest_row_tensor.py161 def _update_run_op_for_map_row_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument
167 def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument
183 next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32)
184 - beta2, op_square(gradient_fp32))
199 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument
202 validator.check_value_type("beta2", beta2, [float], prim_name)
206 validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name)
212 … def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, argument
217 _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
219 self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
[all …]
/third_party/mindspore/tests/st/auto_monad/
Dtest_auto_monad_expression.py34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
73 beta2 = Tensor(0.999, ms.float32)
76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
/third_party/boost/boost/numeric/odeint/stepper/detail/
Dpid_step_adjuster.hpp42 const double beta2; member
55 :beta1(b1), beta2(b2), beta3(b3), alpha1(a1), alpha2(a2), in pid_op()
66 adapted_pow(abs(t3), -beta2/(m_steps + 1)) * in operator ()()
/third_party/mindspore/tests/st/ops/ascend/test_aicpu_ops/
Dtest_fused_sparse_lazy_adam.py29 beta2 = 0.999 variable
42 lr, beta1, beta2, epsilon, grad, indices)

123