Home
last modified time | relevance | path

Searched defs:weight_decay (Results 1 – 25 of 32) sorted by relevance

12

/third_party/mindspore/mindspore/core/ops/
Dsgd.cc21 void SGD::Init(const float dampening, const float weight_decay, const bool nesterov) { in Init()
32 void SGD::set_weight_decay(const float weight_decay) { (void)AddAttr(kWeightDecay, MakeValue(weight… in set_weight_decay()
/third_party/mindspore/mindspore/ccsrc/backend/optimizer/gpu/
Dapply_momentum_weight_fusion.cc29 VectorRef weight_decay = in DefinePattern() local
41 auto weight_decay = utils::cast<AnfNodePtr>((*equiv)[weight_decay_]); in Process() local
Dapply_momentum_weight_scale_fusion.cc101 auto weight_decay = utils::cast<AnfNodePtr>((*equiv)[weight_decay_]); in Process() local
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dsgd_impl.cu21 __global__ void SGDKernel(const int size, const T dampening, const T weight_decay, const bool neste… in SGDKernel()
49 void SGD(const int size, const T dampening, const T weight_decay, const bool nesterov, const T *lr,… in SGD()
Dadam_weight_decay_impl.cu23 … const float *epsilon, const float *lr, const float *weight_decay, T *m, T *v, in AdamWeightDecayKernel()
41const float *weight_decay, T *m, T *v, T *param, T *gradient, cudaStream_t stream) { in AdamWeightDecay()
Dmomentum_impl.cu74 __global__ void FusedMomentumWeightDecayScaleKernel(const size_t element_num, T *weight_decay, T *s… in FusedMomentumWeightDecayScaleKernel()
85 void FusedWeightDecayScaleMomentum(const size_t element_num, T *weight_decay, T *scale, T *variable… in FusedWeightDecayScaleMomentum()
113 __global__ void FusedWeightDecayMomentumKernel(const size_t element_num, T *weight_decay, T *variab… in FusedWeightDecayMomentumKernel()
123 void FusedWeightDecayMomentum(const size_t element_num, T *weight_decay, T *variable, T *accumulati… in FusedWeightDecayMomentum()
157T **weight_decay, T **scale, T **variable, T **accumulation, in CombineFusedMomentumWeightDecayScaleKernel()
170T **weight_decay, T **scale, T **variable, T **accumulation, in CombineFusedWeightDecayScaleMomentum()
/third_party/mindspore/mindspore/nn/optim/
Dlamb.py39 def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_f… argument
121 def _update_run_op_ascend(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, … argument
295 def __init__(self, params, learning_rate, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
Doptimizer.py129 def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): argument
355 def _preprocess_weight_decay(self, weight_decay): argument
468 def _init_group_params(self, parameters, learning_rate, weight_decay, grad_centralization): argument
663 def _tensor_apply_decay_with_sparse(weight_decay, if_apply, weight, gradient): argument
674 def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): argument
Dadam.py37 def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_fi… argument
317 use_nesterov=False, weight_decay=0.0, loss_scale=1.0): argument
483 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
630 use_nesterov=False, weight_decay=0.0, loss_scale=1.0): argument
Dlazyadam.py95 def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): argument
237 use_nesterov=False, weight_decay=0.0, loss_scale=1.0): argument
Dadafactor.py69 weight_decay, scale_lr, scale_parameter, relative_step, argument
295 weight_decay=0.0, argument
Dmomentum.py151 …def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov… argument
Dsgd.py139 …def __init__(self, params, learning_rate=0.1, momentum=0.0, dampening=0.0, weight_decay=0.0, neste… argument
Dada_grad.py150 update_slots=True, loss_scale=1.0, weight_decay=0.0): argument
Dlars.py28 def _tensor_run_opt(lars, learning_rate, weight_decay, gradient, weight, decay_flag, lars_flag): argument
Dthor.py49 def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): argument
244 def thor(net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, batch_size=32, argument
372 …def __init__(self, net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, batch_… argument
663 …def __init__(self, net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, batch_… argument
/third_party/mindspore/tests/st/fl/mobile/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
282 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
392 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
/third_party/mindspore/tests/st/fl/albert/src/
Dadam.py34 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
49 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
282 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
392 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
/third_party/mindspore/tests/st/fl/hybrid_lenet/src/
Dadam.py33 def _update_run_kernel(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flags, opt… argument
48 def _update_run_op(beta1, beta2, eps, lr, overflow, weight_decay, param, m, v, gradient, decay_flag… argument
281 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
391 …def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): argument
/third_party/mindspore/tests/st/networks/models/resnet50/src_thor/
Dthor.py48 def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): argument
147 def THOR(net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, batch_size=32, argument
159 …def __init__(self, net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, batch_… argument
/third_party/mindspore/tests/st/ops/ascend/test_tbe_ops/
Dtest_apply_adam.py27 … def __init__(self, batch_num, input_channels, output_channels, epoch, lr, weight_decay, epsilon): argument
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/
Dfused_weightdecay_momentum_gpu_kernel.h41 T *weight_decay = GetDeviceAddress<T>(inputs, 0); in Launch() local
Dfused_weightdecay_scale_momentum_gpu_kernel.h41 T *weight_decay = GetDeviceAddress<T>(inputs, 0); in Launch() local
Dfused_adam_weight_decay.h75 float *weight_decay = weight_decay_ ? GetDeviceAddress<float>(inputs, 10) : nullptr; in Launch() local
Dcombine_momentum_gpu_kernel.h52 T *weight_decay = GetDeviceAddress<T>(inputs, i * input_num_); in Launch() local

12