/third_party/mindspore/mindspore/nn/optim/ |
D | adam.py | 37 def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_fi… argument 73 next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) 74 - beta2, op_square(gradient_fp32)) 94 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 103 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) 104 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 109 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, 118 success = F.depend(success, F.assign(v, op_mul(beta2, v))) 129 op_mul(F.tuple_to_array((1.0,)) - beta2, op_square(grad_value))) 155 beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, argument [all …]
|
D | lazyadam.py | 35 beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable): 44 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) 45 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 50 … success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, 62 next_v = v_slice * beta2 + values * values * (1 - beta2) 82 … beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable): 87 … success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient), 90 …ccess = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, 95 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument 98 validator.check_value_type("beta2", beta2, [float], prim_name) [all …]
|
D | lamb.py | 39 def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_f… argument 82 …next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradie… 87 op_pow(beta2, op_cast(global_step + num_one, mstype.float32))) 121 def _update_run_op_ascend(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, … argument 155 beta1, 1.0 - beta1, beta2, 1.0 - beta2, eps, 164 def _check_param_value(beta1, beta2, eps, prim_name): argument 166 validator.check_value_type("beta2", beta2, [float], prim_name) 169 validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name) 295 def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument 297 _check_param_value(beta1, beta2, eps, self.cls_name) [all …]
|
/third_party/mindspore/tests/st/fl/mobile/src/ |
D | adam.py | 33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 87 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\ 88 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32))) 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 118 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/tests/st/fl/albert/src/ |
D | adam.py | 34 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 41 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 43 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 49 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 88 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\ 89 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32))) 110 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 119 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) 120 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 125 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/ |
D | adam.py | 33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument 40 next_param = adam(param, m, v, lr, beta1, beta2, eps, weight_decay, gradient) 42 next_param = adam(param, m, v, lr, beta1, beta2, eps, 0.0, gradient) 48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument 87 next_v = op_mul(beta2, v_fp32) + op_select(cond, v_fp32,\ 88 … op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32))) 109 … beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): argument 118 op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) 119 success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, 124 … success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | adam_impl.cu | 31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel() 38 v[i] += (gradient[i] * gradient[i] - v[i]) * (one - beta2[0]); in ApplyAdamKernel() 45 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument 49 T next_v = beta2[0] * v[i] + (1 - beta2[0]) * gradient[i] * gradient[i]; in AdamWeightDecayKernel() 59 … const float *beta1, const float *beta2, const float *epsilon, const float *decay, in AdamWeightDecayKernel() argument 63 …half next_v = __float2half(beta2[0]) * v[i] + __float2half(1 - beta2[0]) * gradient[i] * gradient[… in AdamWeightDecayKernel() 73 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… in ApplyAdam() 75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam() 79 … const float *beta2, const float *epsilon, const float *decay, T *variable, T *m, T *v, in AdamWeightDecayOp() argument 81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp() [all …]
|
D | adam_weight_decay_impl.cu | 22 … const float *one_sub_beta1, const float *beta2, const float *one_sub_beta2, in AdamWeightDecayKernel() argument 27 float next_v = beta2[0] * v[i] + one_sub_beta2[0] * gradient[i] * gradient[i]; in AdamWeightDecayKernel() 40 … const float *beta2, const float *one_sub_beta2, const float *epsilon, const float *lr, in AdamWeightDecay() argument 43 …element_num_, need_decay, beta1, one_sub_beta1, beta2, one_sub_beta2, epsilon, lr, weight_decay, m… in AdamWeightDecay() 48 … const float *one_sub_beta1, const float *beta2, const float *one_sub_beta2,
|
D | adam_impl.cuh | 23 …const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_strea… 26 … const float *beta2, const float *epsilon, const float *decay, T *variable, T *m, T *v,
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | adam_fp32.h | 35 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… 37 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… 39 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e… 41 size_t FusedCastAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float … 43 size_t FusedCastAdamFp16(int16_t *var16, float *m, float *v, float lr, float beta1, float beta2, fl…
|
D | adam_fp32.c | 20 int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, con… in AdamFp32() argument 26 __m256 coeff2_r = _mm256_set1_ps(1 - beta2); in AdamFp32() 79 v[c1] += (gradient[c1] * gradient[c1] - v[c1]) * (1 - beta2); in AdamFp32() 89 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsil… in AdamDeltaFp32() argument 95 __m256 coeff2_r = _mm256_set1_ps(1.0f - beta2); in AdamDeltaFp32() 97 __m256 beta2_r = _mm256_set1_ps(beta2); in AdamDeltaFp32() 144 v[c1] *= beta2; in AdamDeltaFp32() 145 v[c1] += (1 - beta2) * gradient[c1] * gradient[c1]; in AdamDeltaFp32() 155 int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float e… in AdamWeightDecayFp32() argument 159 const float beta2_minus = 1 - beta2; in AdamWeightDecayFp32() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/ |
D | adam_delta_cpu_kernel.cc | 35 void AdamDeltaCPUKernel::LaunchAdamDelta(T *delta, T *m, T *v, float lr, float beta1, float beta2, … in LaunchAdamDelta() argument 39 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta() 40 … (void)AdamDeltaFp32(delta, m, v, lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamDelta() 43 task = [this, delta, m, v, lr, beta1, beta2, epsilon, gradient](size_t start, size_t end) { in LaunchAdamDelta() 47 v[c1] *= beta2; in LaunchAdamDelta() 48 v[c1] += (1 - beta2) * gradient[c1] * gradient[c1]; in LaunchAdamDelta() 123 auto beta2 = reinterpret_cast<float *>(inputs[6]->addr)[0]; in Launch() local 135 LaunchAdamDelta<float>(delta, m, v, lr, beta1, beta2, epsilon, grad, lens); in Launch()
|
D | fused_cast_adam_weight_decay_cpu_kernel.cc | 32 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp32() local 37 const auto beta2_minus = 1 - beta2; in LaunchFusedCastAdamFp32() 44 …size_t i = FusedCastAdamFp32(var, m, v, lr, beta1, beta2, epsilon, decay, reinterpret_cast<int16_t… in LaunchFusedCastAdamFp32() 66 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchFusedCastAdamFp16() local 71 const auto beta2_minus = 1 - beta2; in LaunchFusedCastAdamFp16() 78 …size_t i = FusedCastAdamFp16(reinterpret_cast<int16_t *>(var16), m, v, lr, beta1, beta2, epsilon, … in LaunchFusedCastAdamFp16()
|
D | adam_cpu_kernel.cc | 40 T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); in LaunchAdam() local 50 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdam() 54 v[i] += (gradient[i] * gradient[i] - v[i]) * (one - beta2); in LaunchAdam() 75 float beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchAdamNnacl() local 86 …auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t en… in LaunchAdamNnacl() 87 … int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); in LaunchAdamNnacl()
|
D | sparse_apply_adam_cpu_kernel.cc | 34 const auto beta2 = input_params->beta2_; in ComputeAdam() local 49 v[j] += (1 - beta2) * summed_grad * summed_grad; in ComputeAdam() 63 const auto beta2 = input_params->beta2_; in ComputeMomentum() local 66 v[i] *= beta2; in ComputeMomentum() 155 auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; in LaunchKernel() local 183 input_params.beta2_ = beta2; in LaunchKernel()
|
D | sparse_apply_lazy_adam_cpu_kernel.cc | 35 const auto beta2 = input_params->beta2_; in ComputeLazyAdam() local 51 v[j] = beta2 * v[j] + (1 - beta2) * summed_grad * summed_grad; in ComputeLazyAdam() 135 auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; in LaunchKernel() local 162 input_params.beta2_ = beta2; in LaunchKernel()
|
D | adam_weight_decay_cpu_kernel.cc | 43 T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]); in LaunchAdamWeightDecay() local 49 const T beta2_minus = one - beta2; in LaunchAdamWeightDecay() 74 auto beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; in LaunchAdamWeightDecayNnacl() local 83 … int ret = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end); in LaunchAdamWeightDecayNnacl()
|
/third_party/mindspore/tests/st/ops/graph_kernel/ |
D | test_fused_adam.py | 47 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument 56 next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(one_sub_beta_2, 91 …def construct(self, beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tens… argument 102 next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(one_sub_beta_2, 119 def CalFusedAdam(beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, … argument 122 v_expect = beta2 * v + one_sub_beta_2 * gradient * gradient 133 beta2 = np.array([0.999]).astype(np.float32) 146 …_ = opt(Tensor(beta1), Tensor(beta2), Tensor(one_sub_beta_1), Tensor(one_sub_beta_2), Tensor(gradi… 149 beta1, beta2, one_sub_beta_1, one_sub_beta_2, gradient, eps, weight_decay_tensor, lr, 162 beta2 = np.array([0.999]).astype(np.float32) [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/ |
D | adam.cc | 34 …at *m, float *v, const float *gradient, float *weight, float beta1, float beta2, float beta1_power, in DoAdam() argument 47 const float one_minus_beta2 = 1.f - beta2; in DoAdam() 73 auto beta2 = reinterpret_cast<float *>(in_tensors_.at(7)->MutableData())[0]; in Execute() local 87 return DoAdam(m, v, gradient, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in Execute() 144 auto beta2 = reinterpret_cast<float *>(in_tensors_.at(7)->MutableData())[0]; in OptimizerStep() local 155 … ret = DoAdam(m, v, grad_sum_, weight, beta1, beta2, beta1_power, beta2_power, eps, learning_rate, in OptimizerStep()
|
/third_party/mindspore/tests/st/ops/gpu/ |
D | test_adam_fusion.py | 46 def construct(self, beta1, beta2, gradient, eps, weight_decay_tensor, lr): argument 54 …next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.fl… 55 beta2, self.op_square(gradient_fp32)) 73 beta2 = Tensor(np.array([0.999]).astype(np.float32)) 80 _ = opt(beta1, beta2, gradient, eps, weight_decay_tensor, lr)
|
/third_party/mindspore/tests/ut/python/optimizer/ |
D | test_auto_grad.py | 292 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 293 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 303 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 304 out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 305 …gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 306 …gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self… 316 beta2 = Tensor(np.array([0.999], dtype=np.float32)) 321 grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 333 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 334 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… [all …]
|
/third_party/mindspore/tests/ut/python/ir/ |
D | test_row_tensor.py | 161 def _update_run_op_for_map_row_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument 167 def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param, argument 183 next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) 184 - beta2, op_square(gradient_fp32)) 199 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument 202 validator.check_value_type("beta2", beta2, [float], prim_name) 206 validator.check_float_range(beta2, 0.0, 1.0, Rel.INC_NEITHER, "beta2", prim_name) 212 … def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, argument 217 _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) 219 self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) [all …]
|
/third_party/mindspore/tests/st/auto_monad/ |
D | test_auto_monad_expression.py | 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): argument 36 …self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, gra… 73 beta2 = Tensor(0.999, ms.float32) 76 out, new_var, new_m, new_v = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 79 …out_pyn, new_var_pyn, new_m_pyn, new_v_pyn = net(beta1_power, beta2_power, lr, beta1, beta2, epsil…
|
/third_party/boost/boost/numeric/odeint/stepper/detail/ |
D | pid_step_adjuster.hpp | 42 const double beta2; member 55 :beta1(b1), beta2(b2), beta3(b3), alpha1(a1), alpha2(a2), in pid_op() 66 adapted_pow(abs(t3), -beta2/(m_steps + 1)) * in operator ()()
|
/third_party/mindspore/tests/st/ops/ascend/test_aicpu_ops/ |
D | test_fused_sparse_lazy_adam.py | 29 beta2 = 0.999 variable 42 lr, beta1, beta2, epsilon, grad, indices)
|