Home
last modified time | relevance | path

Searched refs:beta1 (Results 1 – 25 of 76) sorted by relevance

1234

/third_party/mindspore/tests/st/fl/mobile/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
84 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\
85 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32))
109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable):
117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/tests/st/fl/albert/src/
Dadam.py34 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
41 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
43 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
49 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
85 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\
86 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32))
110 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable):
118 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient)
42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient)
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
84 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\
85 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32))
109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable):
117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
[all …]
/third_party/mindspore/mindspore/nn/optim/
Dadam.py37 def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_fi… argument
70 next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32)
71 - beta1, gradient_fp32)
94 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable):
102 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2,
117 success = F.depend(success, F.assign(m, op_mul(beta1, m)))
125 op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value))
133 F.assign(m, op_mul(beta1, next_m))
[all …]
Dlazyadam.py35 beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable):
43 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
61 next_m = m_slice * beta1 + values * (1 - beta1)
67 m_temp = beta1 * next_m + values * (1 - beta1)
82beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable):
87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
90 …ccess = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
95 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument
[all …]
Dlamb.py39 def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_f… argument
80 … next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32)
85 - op_pow(beta1, op_cast(global_step + num_one, mstype.float32)))
121 def _update_run_op_ascend(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, … argument
155 beta1, 1.0 - beta1, beta2, 1.0 - beta2, eps,
164 def _check_param_value(beta1, beta2, eps, prim_name): argument
165 validator.check_value_type("beta1", beta1, [float], prim_name)
168 validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name)
295 def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
297 _check_param_value(beta1, beta2, eps, self.cls_name)
[all …]
Dadafactor.py68 def _run_opt_with_one_number(eps, clip_threshold, decay_rate, beta1, argument
140 exp_avg_update = P.Add()(P.Mul()(exp_avg_update, beta1), update * (1 - beta1))
294 beta1=0.9, argument
310 if beta1 is None:
311 beta1 = 0.0
333 validator.check_value_type("beta1", beta1, [int, float], self.cls_name)
334 validator.check_non_negative_float(float(beta1), "beta1", self.cls_name)
338 self.beta1 = trans_to_tensor(beta1)
348 self.init_ada_factor_state(beta1)
352 def init_ada_factor_state(self, beta1): argument
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dadam_impl.cu31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel()
37 m[i] += (gradient[i] - m[i]) * (one - beta1[0]); in ApplyAdamKernel()
45 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument
48 T next_m = beta1[0] * m[i] + (1 - beta1[0]) * gradient[i]; in AdamWeightDecayKernel()
59 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument
62 half next_m = __float2half(beta1[0]) * m[i] + __float2half(1 - beta1[0]) * gradient[i]; in AdamWeightDecayKernel()
73 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… in ApplyAdam()
75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam()
78 …WeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const float *beta1, in AdamWeightDecayOp() argument
81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp()
[all …]
Dadam_weight_decay_impl.cu21 …bal__ void AdamWeightDecayKernel(const int element_num_, const bool need_decay, const float *beta1, in AdamWeightDecayKernel() argument
26 float next_m = beta1[0] * m[i] + one_sub_beta1[0] * gradient[i]; in AdamWeightDecayKernel()
39 void AdamWeightDecay(const int &element_num_, const bool &need_decay, const float *beta1, const flo… in AdamWeightDecay() argument
43 …element_num_, need_decay, beta1, one_sub_beta1, beta2, one_sub_beta2, epsilon, lr, weight_decay, m… in AdamWeightDecay()
47 template void AdamWeightDecay(const int &element_num_, const bool &need_decay, const float *beta1,
Dadam_impl.cuh23 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea…
25 …WeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const float *beta1,
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dadam_fp32.c20 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… in AdamFp32() argument
25 __m256 coeff1_r = _mm256_set1_ps(1 - beta1); in AdamFp32()
27 __m256 beta1_r = _mm256_set1_ps(beta1); in AdamFp32()
78 m[c1] += (gradient[c1] - m[c1]) * (1 - beta1); in AdamFp32()
81 var[c1] -= lr * (m[c1] * beta1 + (1 - beta1) * gradient[c1]) / (sqrt(v[c1]) + epsilon); in AdamFp32()
89 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… in AdamDeltaFp32() argument
94 __m256 coeff1_r = _mm256_set1_ps(1.0f - beta1); in AdamDeltaFp32()
96 __m256 beta1_r = _mm256_set1_ps(beta1); in AdamDeltaFp32()
142 m[c1] *= beta1; in AdamDeltaFp32()
143 m[c1] += (1 - beta1) * gradient[c1]; in AdamDeltaFp32()
[all …]
Dadam_fp32.h35 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con…
37 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil…
39 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e…
41 size_t FusedCastAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float …
43 size_t FusedCastAdamFp16(int16_t *var16, float *m, float *v, float lr, float beta1, float beta2, fl…
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Dadam_delta_cpu_kernel.cc35 void AdamDeltaCPUKernel::LaunchAdamDelta(T *delta, T *m, T *v, float lr, float beta1, float beta2, … in LaunchAdamDelta() argument
39 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta()
40 … (void)AdamDeltaFp32(delta, m, v, lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamDelta()
43 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta()
45 m[c1] *= beta1; in LaunchAdamDelta()
46 m[c1] += (1 - beta1) * gradient[c1]; in LaunchAdamDelta()
50 … delta[c1] = -lr * (m[c1] * beta1 + (1 - beta1) * gradient[c1]) / (std::sqrt(v[c1]) + epsilon); in LaunchAdamDelta()
122 auto beta1 = reinterpret_cast<float *>(inputs[5]->addr)[0]; in Launch() local
135 LaunchAdamDelta<float>(delta, m, v, lr, beta1, beta2, epsilon, grad, lens); in Launch()
Dadam_cpu_kernel.cc39 T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdam() local
50 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdam()
53 m[i] += (gradient[i] - m[i]) * (one - beta1); in LaunchAdam()
57 var[i] -= new_lr * (m[i] * beta1 + (one - beta1) * gradient[i]) / (sqrt_v + epsilon); in LaunchAdam()
74 float beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchAdamNnacl() local
86 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdamNnacl()
87 … int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamNnacl()
Dsparse_apply_adam_cpu_kernel.cc33 const auto beta1 = input_params->beta1_; in ComputeAdam() local
48 m[j] += (1 - beta1) * summed_grad; in ComputeAdam()
51 m_t[j] = m[j] * beta1 + (1 - beta1) * summed_grad; in ComputeAdam()
62 const auto beta1 = input_params->beta1_; in ComputeMomentum() local
65 m[i] *= beta1; in ComputeMomentum()
154 auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in LaunchKernel() local
182 input_params.beta1_ = beta1; in LaunchKernel()
Dsparse_apply_lazy_adam_cpu_kernel.cc34 const auto beta1 = input_params->beta1_; in ComputeLazyAdam() local
50 m[j] = beta1 * m[j] + (1 - beta1) * summed_grad; in ComputeLazyAdam()
53 var[j] -= lr * (m[j] * beta1 + (1 - beta1) * summed_grad) / (std::sqrt(v[j]) + epsilon); in ComputeLazyAdam()
134 auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in LaunchKernel() local
161 input_params.beta1_ = beta1; in LaunchKernel()
Dfused_cast_adam_weight_decay_cpu_kernel.cc31 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp32() local
36 const auto beta1_minus = 1 - beta1; in LaunchFusedCastAdamFp32()
44 …size_t i = FusedCastAdamFp32(var, m, v, lr, beta1, beta2, epsilon, decay, reinterpret_cast<int16_t… in LaunchFusedCastAdamFp32()
65 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp16() local
70 const auto beta1_minus = 1 - beta1; in LaunchFusedCastAdamFp16()
78 …size_t i = FusedCastAdamFp16(reinterpret_cast<int16_t *>(var16), m, v, lr, beta1, beta2, epsilon, … in LaunchFusedCastAdamFp16()
Dadam_weight_decay_cpu_kernel.cc42 T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdamWeightDecay() local
48 const T beta1_minus = one - beta1; in LaunchAdamWeightDecay()
73 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchAdamWeightDecayNnacl() local
83 … int ret = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end); in LaunchAdamWeightDecayNnacl()
/third_party/mindspore/tests/st/ops/graph_kernel/
Dtest_fused_adam.py47 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument
53 next_m = self.op_mul(beta1, m_fp32) + \
91 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument
99 next_m = self.op_mul(beta1, m_fp32) + \
119 def CalFusedAdam(beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, … argument
121 m_expect = beta1 * m + one_sub_beta_1 * gradient
132 beta1 = np.array([0.9]).astype(np.float32)
146 …_ = opt(Tensor(beta1), Tensor(beta2), Tensor(one_sub_beta_1), Tensor(one_sub_beta_2), Tensor(gradi…
149 beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, lr,
161 beta1 = np.array([0.9]).astype(np.float32)
[all …]
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/
Dadam.cc34 static int DoAdam(float *m, float *v, const float *gradient, float *weight, float beta1, float beta… in DoAdam() argument
46 const float one_minus_beta1 = 1.f - beta1; in DoAdam()
52 … weight[i] -= update_lr * (m[i] * beta1 + one_minus_beta1 * gradient[i]) / (std::sqrt(v[i]) + eps); in DoAdam()
72 auto beta1 = reinterpret_cast<float *>(in_tensors_.at(6)->MutableData())[0]; in Execute() local
87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute()
143 auto beta1 = reinterpret_cast<float *>(in_tensors_.at(6)->MutableData())[0]; in OptimizerStep() local
155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
/third_party/mindspore/tests/st/ops/gpu/
Dtest_adam_fusion.py46 def construct(self, beta1, beta2, gradient, eps, weight_decay_tensor, lr): argument
52 next_m = self.op_mul(beta1, m_fp32) + \
53 … self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32)
72 beta1 = Tensor(np.array([0.9]).astype(np.float32))
80 _ = opt(beta1, beta2, gradient, eps, weight_decay_tensor, lr)
/third_party/mindspore/tests/ut/python/optimizer/
Dtest_auto_grad.py292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self…
315 beta1 = Tensor(np.array([0.9], dtype=np.float32))
321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
[all …]
/third_party/mindspore/tests/ut/python/ir/
Dtest_row_tensor.py161 def _update_run_op_for_map_row_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument
167 def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument
181 …next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1,…
199 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument
201 validator.check_value_type("beta1", beta1, [float], prim_name)
205 validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name)
212 … def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, argument
217 _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
218 self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
231 … updated_velocity = self.map(F.partial(adam_opt_for_map, self.beta1, self.beta2, self.eps, lr,
/third_party/mindspore/tests/st/auto_monad/
Dtest_auto_monad_expression.py34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument
36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra…
72 beta1 = Tensor(0.9, ms.float32)
76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
/third_party/boost/boost/numeric/odeint/stepper/detail/
Dpid_step_adjuster.hpp41 const double beta1; member
55 :beta1(b1), beta2(b2), beta3(b3), alpha1(a1), alpha2(a2), in pid_op()
65 t1 = adapted_pow(abs(t2), -beta1/(m_steps + 1)) * in operator ()()
79 t1 = adapted_pow(abs(t2), -beta1/(m_steps + 1)); in operator ()()

1234