/third_party/mindspore/mindspore/nn/optim/ |
D | optimizer.py | 129 def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): argument 152 learning_rate = self._preprocess_single_lr(learning_rate) 159 … self._init_group_params(parameters, learning_rate, weight_decay, self.grad_centralization) 167 self.learning_rate = CellList(self.group_lr, auto_prefix=False) if self.dynamic_lr \ 170 self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') 368 def _preprocess_single_lr(self, learning_rate): argument 370 if isinstance(learning_rate, (float, int)): 371 learning_rate = float(learning_rate) 372 validator.check_non_negative_float(learning_rate, "learning rate", self.cls_name) 373 return learning_rate [all …]
|
D | adafactor.py | 30 def _get_lr(step, rms, learning_rate, relative_step, warmup_init, scale_parameter, eps): argument 32 rel_step_sz = learning_rate 71 learning_rate, step, grad, param, argument 105 …learning_rate_update = _get_lr(step, rms, learning_rate, relative_step, warmup_init, scale_paramet… 106 …learning_rate_update = F.assign(learning_rate, F.cast(learning_rate_update, F.dtype(learning_rate)… 108 learning_rate_update = learning_rate * 1.0 290 learning_rate=None, argument 302 if learning_rate is not None and relative_step: 303 raise ValueError("Cannot combine manual lr and relative_step options", learning_rate) 306 if learning_rate is None and not relative_step: [all …]
|
D | ftrl.py | 29 def _tensor_run_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear, argument 46 def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear, argument 52 success = F.depend(success, pull(push((gradient, learning_rate, l1, l2, lr_power), 55 …success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power)) 197 … def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0, argument 199 super(FTRL, self).__init__(learning_rate, params, weight_decay, loss_scale=loss_scale) 207 self.lr = learning_rate 213 … self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) 217 self._ps_push.add_prim_attr("lr", learning_rate)
|
D | lars.py | 28 def _tensor_run_opt(lars, learning_rate, weight_decay, gradient, weight, decay_flag, lars_flag): argument 34 … grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, weight_decay, learning_rate) 37 grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, num_zero, learning_rate) 114 self.learning_rate = Parameter(Tensor(0.0, dtype=mstype.float32), name="fake_lr") 124 self.origin_learning_rate = optimizer.learning_rate 147 for learning_rate in self.origin_learning_rate: 148 current_dynamic_lr = learning_rate(self.global_step) 160 lr = self.learning_rate
|
D | momentum.py | 28 def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment, ps_parameter, cache… argument 34 shapes = (op_shape(learning_rate), op_shape(gradient), op_shape(momentum)) 35 … success = F.depend(True, _ps_pull(_ps_push((learning_rate, gradient, momentum), shapes), weight)) 37 success = F.depend(True, opt(weight, moment, learning_rate, gradient, momentum)) 151 …def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov… argument 152 super(Momentum, self).__init__(learning_rate, params, weight_decay, loss_scale)
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | momentum_impl.cu | 19 …mentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const S *learning_rate, in MomentumUpdateVariableKernel() argument 24 … variable[i] -= gradient[i] * learning_rate[0] + accumulation[i] * momentum[0] * learning_rate[0]; in MomentumUpdateVariableKernel() 29 variable[i] -= learning_rate[0] * accumulation[i]; in MomentumUpdateVariableKernel() 35 … const float *learning_rate, const half *gradient, const float *momentum, in MomentumUpdateVariableKernel() argument 40 variable[i] -= gradient[i] * __float2half(learning_rate[0]) + in MomentumUpdateVariableKernel() 41 accumulation[i] * __float2half(momentum[0]) * __float2half(learning_rate[0]); in MomentumUpdateVariableKernel() 46 variable[i] -= __float2half(learning_rate[0]) * accumulation[i]; in MomentumUpdateVariableKernel() 52 … const float *learning_rate, const half *gradient, const float *momentum, in MomentumUpdateVariableKernel() argument 57 …variable[i] -= __half2float(gradient[i]) * learning_rate[0] + accumulation[i] * momentum[0] * lear… in MomentumUpdateVariableKernel() 62 variable[i] -= learning_rate[0] * accumulation[i]; in MomentumUpdateVariableKernel() [all …]
|
D | adagrad_impl.cu | 32 const S *learning_rate, in ApplyAdagradKernel() argument 40 variable[i] -= learning_rate[0] * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel() 47 const float *learning_rate, in ApplyAdagradKernel() argument 55 variable[i] -= __float2half(learning_rate[0]) * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel() 62 const float *learning_rate, in ApplyAdagradKernel() argument 70 variable[i] -= learning_rate[0] * __half2float(gradient[i]) / SqrtFunc(accumulation[i]); in ApplyAdagradKernel() 77 const half *learning_rate, in ApplyAdagradKernel() argument 85 variable[i] -= __half2float(learning_rate[0]) * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel() 92 const float *learning_rate, in ApplyAdagradKernel() argument 100 …variable[i] -= __float2half(learning_rate[0]) * __float2half(gradient[i]) / SqrtFunc(accumulation[… in ApplyAdagradKernel() [all …]
|
D | adam_impl.cu | 31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel() argument 34 …const T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power… in ApplyAdamKernel() 44 …obal__ void AdamWeightDecayKernel(const size_t size, const T *gradient, const float *learning_rate, in AdamWeightDecayKernel() argument 52 variable[i] -= learning_rate[0] * update; in AdamWeightDecayKernel() 58 …l__ void AdamWeightDecayKernel(const size_t size, const half *gradient, const float *learning_rate, in AdamWeightDecayKernel() argument 66 variable[i] -= __float2half(learning_rate[0]) * update; in AdamWeightDecayKernel() 72 …size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate, in ApplyAdam() argument 75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam() 78 void AdamWeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const floa… in AdamWeightDecayOp() argument 81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp() [all …]
|
D | rmsprop_impl.cu | 22 __global__ void RmsPropKernel(const T* learning_rate, const T decay, const T momentum, const T epsi… in RmsPropKernel() argument 26 …moment[i] = momentum * moment[i] + learning_rate[0] * rsqrt(mean_square[i] + epsilon) * gradients[… in RmsPropKernel() 32 void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, in RmsProp() argument 34 …RmsPropKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(learning_rate, decay, momentum, e… in RmsProp() 39 __global__ void RmsPropCenterKernel(const T* learning_rate, const T* decay, const T* momentum, cons… in RmsPropCenterKernel() argument 45 moment[i] = momentum[0] * moment[i] + learning_rate[0] * in RmsPropCenterKernel() 52 void RmsPropCenter(const T* learning_rate, const T* decay, const T* momentum, const T* epsilon, T* … in RmsPropCenter() argument 55 …RmsPropCenterKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(learning_rate, decay, momen… in RmsPropCenter() 61 void RmsProp(const float* learning_rate, const float decay, const float momentum, const float epsil… 66 void RmsPropCenter(const float* learning_rate, const float* decay, const float* momentum, const flo…
|
D | momentum_impl.cuh | 22 void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate… 26 const T *learning_rate, const S *gradient, const T *momentum, 30 … const T *learning_rate, const S *gradient, const T *momentum, cudaStream_t cuda_stream); 32 …eMomentum(const size_t element_num, T *scale, T *variable, T *accumulation, const T *learning_rate, 36 … T **scale, T **variable, T **accumulation, T **learning_rate, S **gradient, 40 T **accumulation, T **learning_rate, S **gradient, T **momentum,
|
D | sparse_ftrl_impl.cu | 53 const float learning_rate, const float l1_regularization, in SparseApplyFtrlKernel() argument 57 const T learning_rate_val = static_cast<T>(learning_rate); in SparseApplyFtrlKernel() 85 … const float learning_rate, const float l1_regularization, const float l2_regularization, in CalSparseApplyFtrl() argument 89 …n_stride, learning_rate, l1_regularization, l2_regularization, learning_rate_power, variable, accu… in CalSparseApplyFtrl() 93 const size_t n_stride, const float learning_rate, 98 const size_t n_stride, const float learning_rate, 103 const size_t n_stride, const float learning_rate, 108 const size_t n_stride, const float learning_rate,
|
D | ftrl_impl.cu | 50 __global__ void ApplyFtrlKernel(const size_t size, const T *gradient, const T *learning_rate, in ApplyFtrlKernel() argument 60 const T sigma = (cur_accumulation_power - accumulation_power) / learning_rate[0]; in ApplyFtrlKernel() 65 (cur_accumulation_power / learning_rate[0] + two * l2_regularization[0])) in ApplyFtrlKernel() 72 void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regulariza… in ApplyFtrl() argument 75 …ApplyFtrlKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, … in ApplyFtrl() 80 template void ApplyFtrl<float>(const size_t size, const float *gradient, const float *learning_rate, 84 template void ApplyFtrl<half>(const size_t size, const half *gradient, const half *learning_rate,
|
/third_party/mindspore/mindspore/nn/ |
D | learning_rate_schedule.py | 48 def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name): argument 50 validator.check_positive_float(learning_rate, 'learning_rate', cls_name) 51 validator.check_is_float(learning_rate, 'learning_rate', cls_name) 107 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): argument 109 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name) 110 self.learning_rate = learning_rate 121 return self.learning_rate * self.pow(self.decay_rate, p) 174 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): argument 176 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name) 177 self.learning_rate = learning_rate [all …]
|
D | dynamic_lr.py | 65 def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair): argument 69 validator.check_positive_float(learning_rate, 'learning_rate') 70 validator.check_is_float(learning_rate, 'learning_rate') 76 def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_sta… argument 108 _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) 113 … lr.append(learning_rate * decay_rate ** math.floor(math.floor(i / step_per_epoch) / decay_epoch)) 115 lr.append(learning_rate * decay_rate ** (math.floor(i / step_per_epoch) / decay_epoch)) 119 def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_sta… argument 151 _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) 159 …lr.append(learning_rate * math.e ** (-decay_rate * function(math.floor(i / step_per_epoch), decay_… [all …]
|
/third_party/mindspore/tests/ut/python/nn/ |
D | test_dynamic_lr.py | 22 learning_rate = 0.1 variable 77 … dr.polynomial_decay_lr(learning_rate, lr, total_step, step_per_epoch, decay_epoch, power) 82 … dr.polynomial_decay_lr(learning_rate, lr, total_step, step_per_epoch, decay_epoch, power) 87 dr.exponential_decay_lr(learning_rate, rate, total_step, step_per_epoch, decay_epoch) 92 dr.exponential_decay_lr(learning_rate, rate, total_step, step_per_epoch, decay_epoch) 97 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step1, step_per_epoch, decay_epoch) 103 …dr.polynomial_decay_lr(learning_rate, end_learning_rate, total_step1, step_per_epoch, decay_epoch,… 108 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step1, step_per_epoch, decay_epoch) 114 …dr.polynomial_decay_lr(learning_rate, end_learning_rate, total_step1, step_per_epoch, decay_epoch,… 119 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch1, decay_epoch) [all …]
|
D | test_learning_rate_schedule.py | 24 learning_rate = 0.1 variable 55 lr_schedules.PolynomialDecayLR(learning_rate, lr, decay_steps, power) 60 lr_schedules.PolynomialDecayLR(learning_rate, lr, decay_steps, power) 65 lr_schedules.ExponentialDecayLR(learning_rate, rate, decay_steps) 70 lr_schedules.ExponentialDecayLR(learning_rate, rate, decay_steps) 75 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps_e) 81 lr_schedules.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps_e, power) 86 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps_e) 92 lr_schedules.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps_e, power) 97 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps, is_stair) [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/ |
D | rmsprop_cpu_kernel.cc | 31 float *learning_rate) { in LaunchRMSPropUnuseCenter() argument 34 …task = [this, &variable, &mean_square, &moment, &gradients, &learning_rate](size_t start, size_t e… in LaunchRMSPropUnuseCenter() 35 …SPropUnuseCenterFp32(variable, mean_square, moment, gradients, momentum_, learning_rate[0], decay_, in LaunchRMSPropUnuseCenter() 39 …task = [this, &variable, &mean_square, &moment, &gradients, &learning_rate](size_t start, size_t e… in LaunchRMSPropUnuseCenter() 42 …moment[i] = moment[i] * momentum_ + (gradients[i] * learning_rate[0]) / sqrt(mean_square[i] + epsi… in LaunchRMSPropUnuseCenter() 52 … T *mean_gradients, float *momentum, float *learning_rate, float *decay, in LaunchRMSPropUseCenter() argument 58 learning_rate[0], decay[0], epsilon[0], start, end); in LaunchRMSPropUseCenter() 67 moment[i] = moment[i] * momentum[0] + (gradients[i] * learning_rate[0]) / sqrt(denom); in LaunchRMSPropUseCenter() 105 float *learning_rate = reinterpret_cast<float *>(inputs[3]->addr); in Launch() local 110 LaunchRMSPropUnuseCenter(variable, mean_square, moment, gradients, learning_rate); in Launch() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/ |
D | md_iteration_gradient_descent_impl.cu | 22 const float learning_rate) { in MD_Iteration_Gradient_Descent() argument 25 crd[i].x = crd[i].x + learning_rate * frc[i].x; in MD_Iteration_Gradient_Descent() 26 crd[i].y = crd[i].y + learning_rate * frc[i].y; in MD_Iteration_Gradient_Descent() 27 crd[i].z = crd[i].z + learning_rate * frc[i].z; in MD_Iteration_Gradient_Descent() 35 …IterationGradientDescent(const int atom_numbers, float *crd, float *frc, const float learning_rate, in MDIterationGradientDescent() argument 40 atom_numbers, d_crd, d_frc, learning_rate); in MDIterationGradientDescent()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/ |
D | sgd.cc | 33 int DoSgd(float *weight, float *accumulate, const float *gradient, float learning_rate, float dampe… in DoSgd() argument 39 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoSgd() 44 weight[i] -= accumulate[i] * learning_rate; in DoSgd() 49 weight[i] -= gradient[i] * learning_rate; in DoSgd() 55 int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, float learning_rate, … in DoSgdInit() argument 60 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoSgdInit() 64 weight[i] -= accumulate[i] * learning_rate; in DoSgdInit() 76 float learning_rate = lr_; in Execute() local 89 …DoSgd(weight, accumulate, gradient, learning_rate, sgd_param_->dampening_, moment, sgd_param_->use… in Execute() 100 float learning_rate = lr_; in ExecuteInit() local [all …]
|
D | apply_momentum.cc | 32 static int DoApplyMomentum(float *weight, float *accumulate, float learning_rate, const float *grad… in DoApplyMomentum() argument 37 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoApplyMomentum() 42 weight[i] -= accumulate[i] * learning_rate; in DoApplyMomentum() 54 float learning_rate = lr_; in Execute() local 68 …DoApplyMomentum(weight, accumulate, learning_rate, gradient, moment, apply_momentum_param_->use_ne… in Execute() 124 float learning_rate = lr_; in OptimizerStep() local 133 …DoApplyMomentum(weight, accumulate, learning_rate, grad_sum_, moment, apply_momentum_param_->use_n… in OptimizerStep()
|
/third_party/mindspore/tests/st/ops/gpu/ |
D | test_rmsprop.py | 67 learning_rate, decay, momentum, epsilon): argument 69 moment = momentum * moment + learning_rate / np.sqrt(mean_square + epsilon) * gradients 75 learning_rate, decay, momentum, epsilon): argument 78 moment = momentum * moment + learning_rate / np.sqrt( 88 learning_rate, decay, momentum, epsilon, centered = [0.5, 0.8, 0.9, 1e-3, True] 111 learning_rate, decay, momentum, epsilon) 112 …net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_… 119 learning_rate, decay, momentum, epsilon) 120 …net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradient… 149 learning_rate, decay, momentum, epsilon, centered = [0.1, 0.3, 0.9, 1.0, False] [all …]
|
/third_party/mindspore/tests/st/ops/cpu/ |
D | test_rmsprop.py | 67 learning_rate, decay, momentum, epsilon): argument 69 moment = momentum * moment + learning_rate / np.sqrt(mean_square + epsilon) * gradients 75 learning_rate, decay, momentum, epsilon): argument 78 moment = momentum * moment + learning_rate / np.sqrt( 88 learning_rate, decay, momentum, epsilon, centered = [0.5, 0.8, 0.9, 1e-3, True] 111 learning_rate, decay, momentum, epsilon) 112 …net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_… 119 learning_rate, decay, momentum, epsilon) 120 …net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradient… 149 learning_rate, decay, momentum, epsilon, centered = [0.1, 0.3, 0.9, 1.0, False] [all …]
|
/third_party/mindspore/tests/ut/python/nn/optim/ |
D | test_adam.py | 74 AdamWeightDecay(net.trainable_params(), learning_rate=0.1) 85 optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1) 100 optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9) 114 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) 127 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) 146 optimizer = nn.Adam(group_params, learning_rate=0.1) 165 optimizer = nn.Adam(group_params, learning_rate=schedule_lr) 183 optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr) 201 optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr) 209 AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1) [all …]
|
/third_party/mindspore/tests/ut/python/optimizer/ |
D | test_optimizer_with_parameter_groups.py | 68 opt = Momentum(group_params, learning_rate=default_lr, momentum=0.9) 73 for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()): 101 opt = Momentum(group_params, learning_rate=default_lr, momentum=0.9) 105 for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()): 107 assert np.all(lr.learning_rate.data.asnumpy() == \ 110 assert np.all(lr.learning_rate.data.asnumpy() == \ 134 opt = RMSProp(group_params, learning_rate=default_lr) 137 for lr, param in zip(opt.learning_rate, opt.parameters): 139 assert np.all(lr.learning_rate.data.asnumpy() == \ 142 assert np.all(lr.learning_rate.data.asnumpy() == \ [all …]
|
/third_party/mindspore/tests/st/fl/cross_silo_faster_rcnn/src/ |
D | lr_schedule.py | 20 learning_rate = float(init_lr) + lr_inc * current_step 21 return learning_rate 25 learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr 26 return learning_rate
|