Home
last modified time | relevance | path

Searched refs:learning_rate (Results 1 – 25 of 275) sorted by relevance

1234567891011

/third_party/mindspore/mindspore/nn/optim/
Doptimizer.py129 def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): argument
152 learning_rate = self._preprocess_single_lr(learning_rate)
159 … self._init_group_params(parameters, learning_rate, weight_decay, self.grad_centralization)
167 self.learning_rate = CellList(self.group_lr, auto_prefix=False) if self.dynamic_lr \
170 self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate')
368 def _preprocess_single_lr(self, learning_rate): argument
370 if isinstance(learning_rate, (float, int)):
371 learning_rate = float(learning_rate)
372 validator.check_non_negative_float(learning_rate, "learning rate", self.cls_name)
373 return learning_rate
[all …]
Dadafactor.py30 def _get_lr(step, rms, learning_rate, relative_step, warmup_init, scale_parameter, eps): argument
32 rel_step_sz = learning_rate
71 learning_rate, step, grad, param, argument
105 …learning_rate_update = _get_lr(step, rms, learning_rate, relative_step, warmup_init, scale_paramet…
106 …learning_rate_update = F.assign(learning_rate, F.cast(learning_rate_update, F.dtype(learning_rate)…
108 learning_rate_update = learning_rate * 1.0
290 learning_rate=None, argument
302 if learning_rate is not None and relative_step:
303 raise ValueError("Cannot combine manual lr and relative_step options", learning_rate)
306 if learning_rate is None and not relative_step:
[all …]
Dftrl.py29 def _tensor_run_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear, argument
46 def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear, argument
52 success = F.depend(success, pull(push((gradient, learning_rate, l1, l2, lr_power),
55 …success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
197 … def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0, argument
199 super(FTRL, self).__init__(learning_rate, params, weight_decay, loss_scale=loss_scale)
207 self.lr = learning_rate
213 … self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
217 self._ps_push.add_prim_attr("lr", learning_rate)
Dlars.py28 def _tensor_run_opt(lars, learning_rate, weight_decay, gradient, weight, decay_flag, lars_flag): argument
34 … grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, weight_decay, learning_rate)
37 grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, num_zero, learning_rate)
114 self.learning_rate = Parameter(Tensor(0.0, dtype=mstype.float32), name="fake_lr")
124 self.origin_learning_rate = optimizer.learning_rate
147 for learning_rate in self.origin_learning_rate:
148 current_dynamic_lr = learning_rate(self.global_step)
160 lr = self.learning_rate
Dmomentum.py28 def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment, ps_parameter, cache… argument
34 shapes = (op_shape(learning_rate), op_shape(gradient), op_shape(momentum))
35 … success = F.depend(True, _ps_pull(_ps_push((learning_rate, gradient, momentum), shapes), weight))
37 success = F.depend(True, opt(weight, moment, learning_rate, gradient, momentum))
151 …def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov… argument
152 super(Momentum, self).__init__(learning_rate, params, weight_decay, loss_scale)
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dmomentum_impl.cu19 …mentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const S *learning_rate, in MomentumUpdateVariableKernel() argument
24 … variable[i] -= gradient[i] * learning_rate[0] + accumulation[i] * momentum[0] * learning_rate[0]; in MomentumUpdateVariableKernel()
29 variable[i] -= learning_rate[0] * accumulation[i]; in MomentumUpdateVariableKernel()
35 … const float *learning_rate, const half *gradient, const float *momentum, in MomentumUpdateVariableKernel() argument
40 variable[i] -= gradient[i] * __float2half(learning_rate[0]) + in MomentumUpdateVariableKernel()
41 accumulation[i] * __float2half(momentum[0]) * __float2half(learning_rate[0]); in MomentumUpdateVariableKernel()
46 variable[i] -= __float2half(learning_rate[0]) * accumulation[i]; in MomentumUpdateVariableKernel()
52 … const float *learning_rate, const half *gradient, const float *momentum, in MomentumUpdateVariableKernel() argument
57 …variable[i] -= __half2float(gradient[i]) * learning_rate[0] + accumulation[i] * momentum[0] * lear… in MomentumUpdateVariableKernel()
62 variable[i] -= learning_rate[0] * accumulation[i]; in MomentumUpdateVariableKernel()
[all …]
Dadagrad_impl.cu32 const S *learning_rate, in ApplyAdagradKernel() argument
40 variable[i] -= learning_rate[0] * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel()
47 const float *learning_rate, in ApplyAdagradKernel() argument
55 variable[i] -= __float2half(learning_rate[0]) * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel()
62 const float *learning_rate, in ApplyAdagradKernel() argument
70 variable[i] -= learning_rate[0] * __half2float(gradient[i]) / SqrtFunc(accumulation[i]); in ApplyAdagradKernel()
77 const half *learning_rate, in ApplyAdagradKernel() argument
85 variable[i] -= __half2float(learning_rate[0]) * gradient[i] / SqrtFunc(accumulation[i]); in ApplyAdagradKernel()
92 const float *learning_rate, in ApplyAdagradKernel() argument
100 …variable[i] -= __float2half(learning_rate[0]) * __float2half(gradient[i]) / SqrtFunc(accumulation[… in ApplyAdagradKernel()
[all …]
Dadam_impl.cu31 … const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, in ApplyAdamKernel() argument
34 …const T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power… in ApplyAdamKernel()
44 …obal__ void AdamWeightDecayKernel(const size_t size, const T *gradient, const float *learning_rate, in AdamWeightDecayKernel() argument
52 variable[i] -= learning_rate[0] * update; in AdamWeightDecayKernel()
58 …l__ void AdamWeightDecayKernel(const size_t size, const half *gradient, const float *learning_rate, in AdamWeightDecayKernel() argument
66 variable[i] -= __float2half(learning_rate[0]) * update; in AdamWeightDecayKernel()
72 …size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate, in ApplyAdam() argument
75 size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); in ApplyAdam()
78 void AdamWeightDecayOp(const size_t size, const T *gradient, const float *learning_rate, const floa… in AdamWeightDecayOp() argument
81 …nel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, beta1, beta2, in AdamWeightDecayOp()
[all …]
Drmsprop_impl.cu22 __global__ void RmsPropKernel(const T* learning_rate, const T decay, const T momentum, const T epsi… in RmsPropKernel() argument
26 …moment[i] = momentum * moment[i] + learning_rate[0] * rsqrt(mean_square[i] + epsilon) * gradients[… in RmsPropKernel()
32 void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, in RmsProp() argument
34 …RmsPropKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(learning_rate, decay, momentum, e… in RmsProp()
39 __global__ void RmsPropCenterKernel(const T* learning_rate, const T* decay, const T* momentum, cons… in RmsPropCenterKernel() argument
45 moment[i] = momentum[0] * moment[i] + learning_rate[0] * in RmsPropCenterKernel()
52 void RmsPropCenter(const T* learning_rate, const T* decay, const T* momentum, const T* epsilon, T* … in RmsPropCenter() argument
55 …RmsPropCenterKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(learning_rate, decay, momen… in RmsPropCenter()
61 void RmsProp(const float* learning_rate, const float decay, const float momentum, const float epsil…
66 void RmsPropCenter(const float* learning_rate, const float* decay, const float* momentum, const flo…
Dmomentum_impl.cuh22 void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate
26 const T *learning_rate, const S *gradient, const T *momentum,
30 … const T *learning_rate, const S *gradient, const T *momentum, cudaStream_t cuda_stream);
32 …eMomentum(const size_t element_num, T *scale, T *variable, T *accumulation, const T *learning_rate,
36 … T **scale, T **variable, T **accumulation, T **learning_rate, S **gradient,
40 T **accumulation, T **learning_rate, S **gradient, T **momentum,
Dsparse_ftrl_impl.cu53 const float learning_rate, const float l1_regularization, in SparseApplyFtrlKernel() argument
57 const T learning_rate_val = static_cast<T>(learning_rate); in SparseApplyFtrlKernel()
85 … const float learning_rate, const float l1_regularization, const float l2_regularization, in CalSparseApplyFtrl() argument
89 …n_stride, learning_rate, l1_regularization, l2_regularization, learning_rate_power, variable, accu… in CalSparseApplyFtrl()
93 const size_t n_stride, const float learning_rate,
98 const size_t n_stride, const float learning_rate,
103 const size_t n_stride, const float learning_rate,
108 const size_t n_stride, const float learning_rate,
Dftrl_impl.cu50 __global__ void ApplyFtrlKernel(const size_t size, const T *gradient, const T *learning_rate, in ApplyFtrlKernel() argument
60 const T sigma = (cur_accumulation_power - accumulation_power) / learning_rate[0]; in ApplyFtrlKernel()
65 (cur_accumulation_power / learning_rate[0] + two * l2_regularization[0])) in ApplyFtrlKernel()
72 void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regulariza… in ApplyFtrl() argument
75 …ApplyFtrlKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, gradient, learning_rate, … in ApplyFtrl()
80 template void ApplyFtrl<float>(const size_t size, const float *gradient, const float *learning_rate,
84 template void ApplyFtrl<half>(const size_t size, const half *gradient, const half *learning_rate,
/third_party/mindspore/mindspore/nn/
Dlearning_rate_schedule.py48 def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name): argument
50 validator.check_positive_float(learning_rate, 'learning_rate', cls_name)
51 validator.check_is_float(learning_rate, 'learning_rate', cls_name)
107 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): argument
109 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
110 self.learning_rate = learning_rate
121 return self.learning_rate * self.pow(self.decay_rate, p)
174 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): argument
176 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
177 self.learning_rate = learning_rate
[all …]
Ddynamic_lr.py65 def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair): argument
69 validator.check_positive_float(learning_rate, 'learning_rate')
70 validator.check_is_float(learning_rate, 'learning_rate')
76 def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_sta… argument
108 _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
113 … lr.append(learning_rate * decay_rate ** math.floor(math.floor(i / step_per_epoch) / decay_epoch))
115 lr.append(learning_rate * decay_rate ** (math.floor(i / step_per_epoch) / decay_epoch))
119 def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_sta… argument
151 _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
159 …lr.append(learning_rate * math.e ** (-decay_rate * function(math.floor(i / step_per_epoch), decay_…
[all …]
/third_party/mindspore/tests/ut/python/nn/
Dtest_dynamic_lr.py22 learning_rate = 0.1 variable
77 … dr.polynomial_decay_lr(learning_rate, lr, total_step, step_per_epoch, decay_epoch, power)
82 … dr.polynomial_decay_lr(learning_rate, lr, total_step, step_per_epoch, decay_epoch, power)
87 dr.exponential_decay_lr(learning_rate, rate, total_step, step_per_epoch, decay_epoch)
92 dr.exponential_decay_lr(learning_rate, rate, total_step, step_per_epoch, decay_epoch)
97 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step1, step_per_epoch, decay_epoch)
103 …dr.polynomial_decay_lr(learning_rate, end_learning_rate, total_step1, step_per_epoch, decay_epoch,…
108 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step1, step_per_epoch, decay_epoch)
114 …dr.polynomial_decay_lr(learning_rate, end_learning_rate, total_step1, step_per_epoch, decay_epoch,…
119 … dr.exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch1, decay_epoch)
[all …]
Dtest_learning_rate_schedule.py24 learning_rate = 0.1 variable
55 lr_schedules.PolynomialDecayLR(learning_rate, lr, decay_steps, power)
60 lr_schedules.PolynomialDecayLR(learning_rate, lr, decay_steps, power)
65 lr_schedules.ExponentialDecayLR(learning_rate, rate, decay_steps)
70 lr_schedules.ExponentialDecayLR(learning_rate, rate, decay_steps)
75 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps_e)
81 lr_schedules.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps_e, power)
86 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps_e)
92 lr_schedules.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps_e, power)
97 lr_schedules.ExponentialDecayLR(learning_rate, decay_rate, decay_steps, is_stair)
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Drmsprop_cpu_kernel.cc31 float *learning_rate) { in LaunchRMSPropUnuseCenter() argument
34 …task = [this, &variable, &mean_square, &moment, &gradients, &learning_rate](size_t start, size_t e… in LaunchRMSPropUnuseCenter()
35 …SPropUnuseCenterFp32(variable, mean_square, moment, gradients, momentum_, learning_rate[0], decay_, in LaunchRMSPropUnuseCenter()
39 …task = [this, &variable, &mean_square, &moment, &gradients, &learning_rate](size_t start, size_t e… in LaunchRMSPropUnuseCenter()
42 …moment[i] = moment[i] * momentum_ + (gradients[i] * learning_rate[0]) / sqrt(mean_square[i] + epsi… in LaunchRMSPropUnuseCenter()
52 … T *mean_gradients, float *momentum, float *learning_rate, float *decay, in LaunchRMSPropUseCenter() argument
58 learning_rate[0], decay[0], epsilon[0], start, end); in LaunchRMSPropUseCenter()
67 moment[i] = moment[i] * momentum[0] + (gradients[i] * learning_rate[0]) / sqrt(denom); in LaunchRMSPropUseCenter()
105 float *learning_rate = reinterpret_cast<float *>(inputs[3]->addr); in Launch() local
110 LaunchRMSPropUnuseCenter(variable, mean_square, moment, gradients, learning_rate); in Launch()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/
Dmd_iteration_gradient_descent_impl.cu22 const float learning_rate) { in MD_Iteration_Gradient_Descent() argument
25 crd[i].x = crd[i].x + learning_rate * frc[i].x; in MD_Iteration_Gradient_Descent()
26 crd[i].y = crd[i].y + learning_rate * frc[i].y; in MD_Iteration_Gradient_Descent()
27 crd[i].z = crd[i].z + learning_rate * frc[i].z; in MD_Iteration_Gradient_Descent()
35 …IterationGradientDescent(const int atom_numbers, float *crd, float *frc, const float learning_rate, in MDIterationGradientDescent() argument
40 atom_numbers, d_crd, d_frc, learning_rate); in MDIterationGradientDescent()
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32_grad/
Dsgd.cc33 int DoSgd(float *weight, float *accumulate, const float *gradient, float learning_rate, float dampe… in DoSgd() argument
39 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoSgd()
44 weight[i] -= accumulate[i] * learning_rate; in DoSgd()
49 weight[i] -= gradient[i] * learning_rate; in DoSgd()
55 int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, float learning_rate, … in DoSgdInit() argument
60 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoSgdInit()
64 weight[i] -= accumulate[i] * learning_rate; in DoSgdInit()
76 float learning_rate = lr_; in Execute() local
89 …DoSgd(weight, accumulate, gradient, learning_rate, sgd_param_->dampening_, moment, sgd_param_->use… in Execute()
100 float learning_rate = lr_; in ExecuteInit() local
[all …]
Dapply_momentum.cc32 static int DoApplyMomentum(float *weight, float *accumulate, float learning_rate, const float *grad… in DoApplyMomentum() argument
37 weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; in DoApplyMomentum()
42 weight[i] -= accumulate[i] * learning_rate; in DoApplyMomentum()
54 float learning_rate = lr_; in Execute() local
68 …DoApplyMomentum(weight, accumulate, learning_rate, gradient, moment, apply_momentum_param_->use_ne… in Execute()
124 float learning_rate = lr_; in OptimizerStep() local
133 …DoApplyMomentum(weight, accumulate, learning_rate, grad_sum_, moment, apply_momentum_param_->use_n… in OptimizerStep()
/third_party/mindspore/tests/st/ops/gpu/
Dtest_rmsprop.py67 learning_rate, decay, momentum, epsilon): argument
69 moment = momentum * moment + learning_rate / np.sqrt(mean_square + epsilon) * gradients
75 learning_rate, decay, momentum, epsilon): argument
78 moment = momentum * moment + learning_rate / np.sqrt(
88 learning_rate, decay, momentum, epsilon, centered = [0.5, 0.8, 0.9, 1e-3, True]
111 learning_rate, decay, momentum, epsilon)
112 …net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_…
119 learning_rate, decay, momentum, epsilon)
120 …net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradient…
149 learning_rate, decay, momentum, epsilon, centered = [0.1, 0.3, 0.9, 1.0, False]
[all …]
/third_party/mindspore/tests/st/ops/cpu/
Dtest_rmsprop.py67 learning_rate, decay, momentum, epsilon): argument
69 moment = momentum * moment + learning_rate / np.sqrt(mean_square + epsilon) * gradients
75 learning_rate, decay, momentum, epsilon): argument
78 moment = momentum * moment + learning_rate / np.sqrt(
88 learning_rate, decay, momentum, epsilon, centered = [0.5, 0.8, 0.9, 1e-3, True]
111 learning_rate, decay, momentum, epsilon)
112 …net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_…
119 learning_rate, decay, momentum, epsilon)
120 …net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradient…
149 learning_rate, decay, momentum, epsilon, centered = [0.1, 0.3, 0.9, 1.0, False]
[all …]
/third_party/mindspore/tests/ut/python/nn/optim/
Dtest_adam.py74 AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
85 optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
100 optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9)
114 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
127 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
146 optimizer = nn.Adam(group_params, learning_rate=0.1)
165 optimizer = nn.Adam(group_params, learning_rate=schedule_lr)
183 optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr)
201 optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr)
209 AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1)
[all …]
/third_party/mindspore/tests/ut/python/optimizer/
Dtest_optimizer_with_parameter_groups.py68 opt = Momentum(group_params, learning_rate=default_lr, momentum=0.9)
73 for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()):
101 opt = Momentum(group_params, learning_rate=default_lr, momentum=0.9)
105 for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()):
107 assert np.all(lr.learning_rate.data.asnumpy() == \
110 assert np.all(lr.learning_rate.data.asnumpy() == \
134 opt = RMSProp(group_params, learning_rate=default_lr)
137 for lr, param in zip(opt.learning_rate, opt.parameters):
139 assert np.all(lr.learning_rate.data.asnumpy() == \
142 assert np.all(lr.learning_rate.data.asnumpy() == \
[all …]
/third_party/mindspore/tests/st/fl/cross_silo_faster_rcnn/src/
Dlr_schedule.py20 learning_rate = float(init_lr) + lr_inc * current_step
21 return learning_rate
25 learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr
26 return learning_rate

1234567891011