/third_party/mindspore/tests/st/fl/mobile/src/ |
D | adam.py | 33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 84 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\ 85 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32)) 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/tests/st/fl/albert/src/ |
D | adam.py | 34 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 41 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 43 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 49 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 85 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\ 86 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32)) 110 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): 118 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/ |
D | adam.py | 33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 84 next_m = op_mul(beta1, m_fp32) + op_select(cond, m_fp32,\ 85 op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32)) 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): 117 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/mindspore/nn/optim/ |
D | adam.py | 37 def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_fi… argument 70 next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) 71 - beta1, gradient_fp32) 94 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): 102 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 117 success = F.depend(success, F.assign(m, op_mul(beta1, m))) 125 op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value)) 133 F.assign(m, op_mul(beta1, next_m)) [all …]
|
D | lazyadam.py | 35 beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable): 43 op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), 45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, 61 next_m = m_slice * beta1 + values * (1 - beta1) 67 m_temp = beta1 * next_m + values * (1 - beta1) 82 … beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable): 87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 90 …ccess = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, 95 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument [all …]
|
D | lamb.py | 39 def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_f… argument 80 … next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32) 85 - op_pow(beta1, op_cast(global_step + num_one, mstype.float32))) 121 def _update_run_op_ascend(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, … argument 155 beta1, 1.0 - beta1, beta2, 1.0 - beta2, eps, 164 def _check_param_value(beta1, beta2, eps, prim_name): argument 165 validator.check_value_type("beta1", beta1, [float], prim_name) 168 validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name) 295 def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument 297 _check_param_value(beta1, beta2, eps, self.cls_name) [all …]
|
D | adafactor.py | 68 def _run_opt_with_one_number(eps, clip_threshold, decay_rate, beta1, argument 140 exp_avg_update = P.Add()(P.Mul()(exp_avg_update, beta1), update * (1 - beta1)) 294 beta1=0.9, argument 310 if beta1 is None: 311 beta1 = 0.0 333 validator.check_value_type("beta1", beta1, [int, float], self.cls_name) 334 validator.check_non_negative_float(float(beta1), "beta1", self.cls_name) 338 self.beta1 = trans_to_tensor(beta1) 348 self.init_ada_factor_state(beta1) 352 def init_ada_factor_state(self, beta1): argument [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | adam_impl.cu | 31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel() 37 m[i] += (gradient[i] - m[i]) * (one - beta1[0]); in ApplyAdamKernel() 45 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument 48 T next_m = beta1[0] * m[i] + (1 - beta1[0]) * gradient[i]; in AdamWeightDecayKernel() 59 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument 62 half next_m = __float2half(beta1[0]) * m[i] + __float2half(1 - beta1[0]) * gradient[i]; in AdamWeightDecayKernel() 73 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… in ApplyAdam() 75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam() 78 …WeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const float *beta1, in AdamWeightDecayOp() argument 81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp() [all …]
|
D | adam_weight_decay_impl.cu | 21 …bal__ void AdamWeightDecayKernel(const int element_num_, const bool need_decay, const float *beta1, in AdamWeightDecayKernel() argument 26 float next_m = beta1[0] * m[i] + one_sub_beta1[0] * gradient[i]; in AdamWeightDecayKernel() 39 void AdamWeightDecay(const int &element_num_, const bool &need_decay, const float *beta1, const flo… in AdamWeightDecay() argument 43 …element_num_, need_decay, beta1, one_sub_beta1, beta2, one_sub_beta2, epsilon, lr, weight_decay, m… in AdamWeightDecay() 47 template void AdamWeightDecay(const int &element_num_, const bool &need_decay, const float *beta1,
|
D | adam_impl.cuh | 23 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… 25 …WeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const float *beta1,
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | adam_fp32.c | 20 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… in AdamFp32() argument 25 __m256 coeff1_r = _mm256_set1_ps(1 - beta1); in AdamFp32() 27 __m256 beta1_r = _mm256_set1_ps(beta1); in AdamFp32() 78 m[c1] += (gradient[c1] - m[c1]) * (1 - beta1); in AdamFp32() 81 var[c1] -= lr * (m[c1] * beta1 + (1 - beta1) * gradient[c1]) / (sqrt(v[c1]) + epsilon); in AdamFp32() 89 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… in AdamDeltaFp32() argument 94 __m256 coeff1_r = _mm256_set1_ps(1.0f - beta1); in AdamDeltaFp32() 96 __m256 beta1_r = _mm256_set1_ps(beta1); in AdamDeltaFp32() 142 m[c1] *= beta1; in AdamDeltaFp32() 143 m[c1] += (1 - beta1) * gradient[c1]; in AdamDeltaFp32() [all …]
|
D | adam_fp32.h | 35 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… 37 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… 39 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e… 41 size_t FusedCastAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float … 43 size_t FusedCastAdamFp16(int16_t *var16, float *m, float *v, float lr, float beta1, float beta2, fl…
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/ |
D | adam_delta_cpu_kernel.cc | 35 void AdamDeltaCPUKernel::LaunchAdamDelta(T *delta, T *m, T *v, float lr, float beta1, float beta2, … in LaunchAdamDelta() argument 39 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta() 40 … (void)AdamDeltaFp32(delta, m, v, lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamDelta() 43 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta() 45 m[c1] *= beta1; in LaunchAdamDelta() 46 m[c1] += (1 - beta1) * gradient[c1]; in LaunchAdamDelta() 50 … delta[c1] = -lr * (m[c1] * beta1 + (1 - beta1) * gradient[c1]) / (std::sqrt(v[c1]) + epsilon); in LaunchAdamDelta() 122 auto beta1 = reinterpret_cast<float *>(inputs[5]->addr)[0]; in Launch() local 135 LaunchAdamDelta<float>(delta, m, v, lr, beta1, beta2, epsilon, grad, lens); in Launch()
|
D | adam_cpu_kernel.cc | 39 T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdam() local 50 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdam() 53 m[i] += (gradient[i] - m[i]) * (one - beta1); in LaunchAdam() 57 var[i] -= new_lr * (m[i] * beta1 + (one - beta1) * gradient[i]) / (sqrt_v + epsilon); in LaunchAdam() 74 float beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchAdamNnacl() local 86 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdamNnacl() 87 … int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamNnacl()
|
D | sparse_apply_adam_cpu_kernel.cc | 33 const auto beta1 = input_params->beta1_; in ComputeAdam() local 48 m[j] += (1 - beta1) * summed_grad; in ComputeAdam() 51 m_t[j] = m[j] * beta1 + (1 - beta1) * summed_grad; in ComputeAdam() 62 const auto beta1 = input_params->beta1_; in ComputeMomentum() local 65 m[i] *= beta1; in ComputeMomentum() 154 auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in LaunchKernel() local 182 input_params.beta1_ = beta1; in LaunchKernel()
|
D | sparse_apply_lazy_adam_cpu_kernel.cc | 34 const auto beta1 = input_params->beta1_; in ComputeLazyAdam() local 50 m[j] = beta1 * m[j] + (1 - beta1) * summed_grad; in ComputeLazyAdam() 53 var[j] -= lr * (m[j] * beta1 + (1 - beta1) * summed_grad) / (std::sqrt(v[j]) + epsilon); in ComputeLazyAdam() 134 auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in LaunchKernel() local 161 input_params.beta1_ = beta1; in LaunchKernel()
|
D | fused_cast_adam_weight_decay_cpu_kernel.cc | 31 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp32() local 36 const auto beta1_minus = 1 - beta1; in LaunchFusedCastAdamFp32() 44 …size_t i = FusedCastAdamFp32(var, m, v, lr, beta1, beta2, epsilon, decay, reinterpret_cast<int16_t… in LaunchFusedCastAdamFp32() 65 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp16() local 70 const auto beta1_minus = 1 - beta1; in LaunchFusedCastAdamFp16() 78 …size_t i = FusedCastAdamFp16(reinterpret_cast<int16_t *>(var16), m, v, lr, beta1, beta2, epsilon, … in LaunchFusedCastAdamFp16()
|
D | adam_weight_decay_cpu_kernel.cc | 42 T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdamWeightDecay() local 48 const T beta1_minus = one - beta1; in LaunchAdamWeightDecay() 73 auto beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; in LaunchAdamWeightDecayNnacl() local 83 … int ret = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end); in LaunchAdamWeightDecayNnacl()
|
/third_party/mindspore/tests/st/ops/graph_kernel/ |
D | test_fused_adam.py | 47 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument 53 next_m = self.op_mul(beta1, m_fp32) + \ 91 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument 99 next_m = self.op_mul(beta1, m_fp32) + \ 119 def CalFusedAdam(beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, … argument 121 m_expect = beta1 * m + one_sub_beta_1 * gradient 132 beta1 = np.array([0.9]).astype(np.float32) 146 …_ = opt(Tensor(beta1), Tensor(beta2), Tensor(one_sub_beta_1), Tensor(one_sub_beta_2), Tensor(gradi… 149 beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, lr, 161 beta1 = np.array([0.9]).astype(np.float32) [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/ |
D | adam.cc | 34 static int DoAdam(float *m, float *v, const float *gradient, float *weight, float beta1, float beta… in DoAdam() argument 46 const float one_minus_beta1 = 1.f - beta1; in DoAdam() 52 … weight[i] -= update_lr * (m[i] * beta1 + one_minus_beta1 * gradient[i]) / (std::sqrt(v[i]) + eps); in DoAdam() 72 auto beta1 = reinterpret_cast<float *>(in_tensors_.at(6)->MutableData())[0]; in Execute() local 87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute() 143 auto beta1 = reinterpret_cast<float *>(in_tensors_.at(6)->MutableData())[0]; in OptimizerStep() local 155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
|
/third_party/mindspore/tests/st/ops/gpu/ |
D | test_adam_fusion.py | 46 def construct(self, beta1, beta2, gradient, eps, weight_decay_tensor, lr): argument 52 next_m = self.op_mul(beta1, m_fp32) + \ 53 … self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32) 72 beta1 = Tensor(np.array([0.9]).astype(np.float32)) 80 _ = opt(beta1, beta2, gradient, eps, weight_decay_tensor, lr)
|
/third_party/mindspore/tests/ut/python/optimizer/ |
D | test_auto_grad.py | 292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 315 beta1 = Tensor(np.array([0.9], dtype=np.float32)) 321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… [all …]
|
/third_party/mindspore/tests/ut/python/ir/ |
D | test_row_tensor.py | 161 def _update_run_op_for_map_row_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument 167 def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument 181 …next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1,… 199 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument 201 validator.check_value_type("beta1", beta1, [float], prim_name) 205 validator.check_float_range(beta1, 0.0, 1.0, Rel.INC_NEITHER, "beta1", prim_name) 212 … def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, argument 217 _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) 218 self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) 231 … updated_velocity = self.map(F.partial(adam_opt_for_map, self.beta1, self.beta2, self.eps, lr,
|
/third_party/mindspore/tests/st/auto_monad/ |
D | test_auto_monad_expression.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 72 beta1 = Tensor(0.9, ms.float32) 76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
|
/third_party/boost/boost/numeric/odeint/stepper/detail/ |
D | pid_step_adjuster.hpp | 41 const double beta1; member 55 :beta1(b1), beta2(b2), beta3(b3), alpha1(a1), alpha2(a2), in pid_op() 65 t1 = adapted_pow(abs(t2), -beta1/(m_steps + 1)) * in operator ()() 79 t1 = adapted_pow(abs(t2), -beta1/(m_steps + 1)); in operator ()()
|