1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""Learning rate schedule.""" 16 17import math 18 19from ..common import dtype as mstype 20from ..ops import operations as P 21from .cell import Cell 22from .._checkparam import Validator as validator 23 24 25class LearningRateSchedule(Cell): 26 """Basic class of learning rate schedule.""" 27 def __init__(self): 28 super(LearningRateSchedule, self).__init__() 29 30 def construct(self, global_step): 31 """ 32 Defines the computation to get the current learning rate. 33 34 This method must be overridden by all subclasses. 35 36 Note: 37 The output must be a Tensor of scalar. 38 39 Inputs: 40 - **global_step** (Tensor) - The current step number. 41 42 Inputs: 43 Tensor. Learning rate at current step with shape :math:`()`. 44 """ 45 raise NotImplementedError 46 47 48def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name): 49 validator.check_positive_int(decay_steps, 'decay_steps', cls_name) 50 validator.check_positive_float(learning_rate, 'learning_rate', cls_name) 51 validator.check_is_float(learning_rate, 'learning_rate', cls_name) 52 validator.check_positive_float(decay_rate, 'decay_rate', cls_name) 53 validator.check_is_float(decay_rate, 'decay_rate', cls_name) 54 validator.check_value_type('is_stair', is_stair, [bool], cls_name) 55 56 57class ExponentialDecayLR(LearningRateSchedule): 58 r""" 59 Calculates learning rate base on exponential decay function. 60 61 For the i-th step, the formula of computing decayed_learning_rate[i] is: 62 63 .. math:: 64 decayed\_learning\_rate[i] = learning\_rate * decay\_rate^{p} 65 66 Where : 67 68 .. math:: 69 p = \frac{current\_step}{decay\_steps} 70 71 If `is_stair` is True, the formula is : 72 73 .. math:: 74 p = floor(\frac{current\_step}{decay\_steps}) 75 76 Args: 77 learning_rate (float): The initial value of learning rate. 78 decay_rate (float): The decay rate. 79 decay_steps (int): A value used to calculate decayed learning rate. 80 is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. 81 82 Inputs: 83 - **global_step** (Tensor) - The current step number. 84 85 Outputs: 86 Tensor. The learning rate value for the current step with shape :math:`()`. 87 88 Raises: 89 TypeError: If `learning_rate` or `decay_rate` is not a float. 90 TypeError: If `decay_steps` is not an int or `is_stair` is not a bool. 91 ValueError: If `decay_steps` is less than 1. 92 ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0. 93 94 Supported Platforms: 95 ``Ascend`` ``GPU`` ``CPU`` 96 97 Examples: 98 >>> learning_rate = 0.1 99 >>> decay_rate = 0.9 100 >>> decay_steps = 4 101 >>> global_step = Tensor(2, mstype.int32) 102 >>> exponential_decay_lr = nn.ExponentialDecayLR(learning_rate, decay_rate, decay_steps) 103 >>> result = exponential_decay_lr(global_step) 104 >>> print(result) 105 0.09486833 106 """ 107 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): 108 super(ExponentialDecayLR, self).__init__() 109 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name) 110 self.learning_rate = learning_rate 111 self.decay_rate = decay_rate 112 self.decay_steps = decay_steps 113 self.is_stair = is_stair 114 self.pow = P.Pow() 115 self.cast = P.Cast() 116 117 def construct(self, global_step): 118 p = self.cast(global_step, mstype.float32) / self.decay_steps 119 if self.is_stair: 120 p = P.Floor()(p) 121 return self.learning_rate * self.pow(self.decay_rate, p) 122 123 124class NaturalExpDecayLR(LearningRateSchedule): 125 r""" 126 Calculates learning rate base on natural exponential decay function. 127 128 For the i-th step, the formula of computing decayed_learning_rate[i] is: 129 130 .. math:: 131 decayed\_learning\_rate[i] = learning\_rate * e^{-decay\_rate * p} 132 133 Where : 134 135 .. math:: 136 p = \frac{current\_step}{decay\_steps} 137 138 If `is_stair` is True, the formula is : 139 140 .. math:: 141 p = floor(\frac{current\_step}{decay\_steps}) 142 143 Args: 144 learning_rate (float): The initial value of learning rate. 145 decay_rate (float): The decay rate. 146 decay_steps (int): A value used to calculate decayed learning rate. 147 is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. 148 149 Inputs: 150 - **global_step** (Tensor) - The current step number. 151 152 Outputs: 153 Tensor. The learning rate value for the current step with shape :math:`()`. 154 155 Raises: 156 TypeError: If `learning_rate` or `decay_rate` is not a float. 157 TypeError: If `decay_steps` is not an int or `is_stair` is not a bool. 158 ValueError: If `decay_steps` is less than 1. 159 ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0. 160 161 Supported Platforms: 162 ``Ascend`` ``GPU`` ``CPU`` 163 164 Examples: 165 >>> learning_rate = 0.1 166 >>> decay_rate = 0.9 167 >>> decay_steps = 4 168 >>> global_step = Tensor(2, mstype.int32) 169 >>> natural_exp_decay_lr = nn.NaturalExpDecayLR(learning_rate, decay_rate, decay_steps, True) 170 >>> result = natural_exp_decay_lr(global_step) 171 >>> print(result) 172 0.1 173 """ 174 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): 175 super(NaturalExpDecayLR, self).__init__() 176 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name) 177 self.learning_rate = learning_rate 178 self.decay_rate = decay_rate 179 self.decay_steps = decay_steps 180 self.is_stair = is_stair 181 self.math_e = math.e 182 self.pow = P.Pow() 183 self.cast = P.Cast() 184 185 def construct(self, global_step): 186 p = self.cast(global_step, mstype.float32) 187 if self.is_stair: 188 p = P.FloorDiv()(p, self.decay_steps) * self.decay_steps 189 return self.learning_rate * self.pow(self.math_e, -self.decay_rate * p) 190 191 192class InverseDecayLR(LearningRateSchedule): 193 r""" 194 Calculates learning rate base on inverse-time decay function. 195 196 For the i-th step, the formula of computing decayed_learning_rate[i] is: 197 198 .. math:: 199 decayed\_learning\_rate[i] = learning\_rate / (1 + decay\_rate * p) 200 201 Where : 202 203 .. math:: 204 p = \frac{current\_step}{decay\_steps} 205 206 If `is_stair` is True, The formula is : 207 208 .. math:: 209 p = floor(\frac{current\_step}{decay\_steps}) 210 211 Args: 212 learning_rate (float): The initial value of learning rate. 213 decay_rate (float): The decay rate. 214 decay_steps (int): A value used to calculate decayed learning rate. 215 is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False. 216 217 Inputs: 218 - **global_step** (Tensor) - The current step number. 219 220 Outputs: 221 Tensor. The learning rate value for the current step with shape :math:`()`. 222 223 Raises: 224 TypeError: If `learning_rate` or `decay_rate` is not a float. 225 TypeError: If `decay_steps` is not an int or `is_stair` is not a bool. 226 ValueError: If `decay_steps` is less than 1. 227 ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0. 228 229 Supported Platforms: 230 ``Ascend`` ``GPU`` ``CPU`` 231 232 Examples: 233 >>> learning_rate = 0.1 234 >>> decay_rate = 0.9 235 >>> decay_steps = 4 236 >>> global_step = Tensor(2, mstype.int32) 237 >>> inverse_decay_lr = nn.InverseDecayLR(learning_rate, decay_rate, decay_steps, True) 238 >>> result = inverse_decay_lr(global_step) 239 >>> print(result) 240 0.1 241 """ 242 def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False): 243 super(InverseDecayLR, self).__init__() 244 _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name) 245 self.learning_rate = learning_rate 246 self.decay_rate = decay_rate 247 self.decay_steps = decay_steps 248 self.is_stair = is_stair 249 self.cast = P.Cast() 250 251 def construct(self, global_step): 252 p = self.cast(global_step, mstype.float32) / self.decay_steps 253 if self.is_stair: 254 p = P.Floor()(p) 255 return self.learning_rate / (1 + self.decay_rate * p) 256 257 258class CosineDecayLR(LearningRateSchedule): 259 r""" 260 Calculates learning rate base on cosine decay function. 261 262 For the i-th step, the formula of computing decayed_learning_rate[i] is: 263 264 .. math:: 265 decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) * 266 (1 + cos(\frac{current\_step}{decay\_steps}\pi)) 267 268 269 Args: 270 min_lr (float): The minimum value of learning rate. 271 max_lr (float): The maximum value of learning rate. 272 decay_steps (int): A value used to calculate decayed learning rate. 273 274 Inputs: 275 - **global_step** (Tensor) - The current step number. 276 277 Outputs: 278 Tensor. The learning rate value for the current step with shape :math:`()`. 279 280 Raises: 281 TypeError: If `min_lr` or `max_lr` is not a float. 282 TypeError: If `decay_steps` is not an int. 283 ValueError: If `min_lr` is less than 0 or `decay_steps` is less than 1. 284 ValueError: If `max_lr` is less than or equal to 0. 285 286 Supported Platforms: 287 ``Ascend`` ``GPU`` 288 289 Examples: 290 >>> min_lr = 0.01 291 >>> max_lr = 0.1 292 >>> decay_steps = 4 293 >>> global_steps = Tensor(2, mstype.int32) 294 >>> cosine_decay_lr = nn.CosineDecayLR(min_lr, max_lr, decay_steps) 295 >>> result = cosine_decay_lr(global_steps) 296 >>> print(result) 297 0.055 298 """ 299 def __init__(self, min_lr, max_lr, decay_steps): 300 super(CosineDecayLR, self).__init__() 301 if not isinstance(min_lr, float): 302 raise TypeError("The min_lr must be float.") 303 validator.check_non_negative_float(min_lr, "min_lr", self.cls_name) 304 validator.check_positive_float(max_lr, 'max_lr', self.cls_name) 305 validator.check_is_float(max_lr, 'max_lr', self.cls_name) 306 validator.check_positive_int(decay_steps, "decay_steps", self.cls_name) 307 if min_lr >= max_lr: 308 raise ValueError('The `max_lr` should be greater than the `min_lr`.') 309 self.min_lr = min_lr 310 self.max_lr = max_lr 311 self.decay_steps = decay_steps 312 self.math_pi = math.pi 313 self.delta = 0.5 * (max_lr - min_lr) 314 self.cos = P.Cos() 315 self.min = P.Minimum() 316 self.cast = P.Cast() 317 318 def construct(self, global_step): 319 p = self.cast(self.min(global_step, self.decay_steps), mstype.float32) 320 return self.min_lr + self.delta * (1.0 + self.cos(self.math_pi * p / self.decay_steps)) 321 322 323class PolynomialDecayLR(LearningRateSchedule): 324 r""" 325 Calculates learning rate base on polynomial decay function. 326 327 For the i-th step, the formula of computing decayed_learning_rate[i] is: 328 329 .. math:: 330 decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) * 331 (1 - tmp\_step / tmp\_decay\_steps)^{power} + end\_learning\_rate 332 333 Where : 334 335 .. math:: 336 tmp\_step=min(current\_step, decay\_steps) 337 338 If `update_decay_steps` is true, update the value of `tmp_decay_step` every `decay_steps`. The formula is : 339 340 .. math:: 341 tmp\_decay\_steps = decay\_steps * ceil(current\_step / decay\_steps) 342 343 Args: 344 learning_rate (float): The initial value of learning rate. 345 end_learning_rate (float): The end value of learning rate. 346 decay_steps (int): A value used to calculate decayed learning rate. 347 power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0. 348 update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. 349 350 Inputs: 351 - **global_step** (Tensor) - The current step number. 352 353 Outputs: 354 Tensor. The learning rate value for the current step with shape :math:`()`. 355 356 Raises: 357 TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float. 358 TypeError: If `decay_steps` is not an int or `update_decay_steps` is not a bool. 359 ValueError: If `end_learning_rate` is less than 0 or `decay_steps` is less than 1. 360 ValueError: If `learning_rate` or `power` is less than or equal to 0. 361 362 Supported Platforms: 363 ``Ascend`` ``GPU`` 364 365 Examples: 366 >>> learning_rate = 0.1 367 >>> end_learning_rate = 0.01 368 >>> decay_steps = 4 369 >>> power = 0.5 370 >>> global_step = Tensor(2, mstype.int32) 371 >>> polynomial_decay_lr = nn.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power) 372 >>> result = polynomial_decay_lr(global_step) 373 >>> print(result) 374 0.07363961 375 """ 376 def __init__(self, learning_rate, end_learning_rate, decay_steps, power, update_decay_steps=False): 377 super(PolynomialDecayLR, self).__init__() 378 validator.check_positive_float(learning_rate, 'learning_rate') 379 validator.check_is_float(learning_rate, 'learning_rate') 380 if not isinstance(end_learning_rate, float): 381 raise TypeError("end_learning_rate must be float.") 382 validator.check_non_negative_float(end_learning_rate, "end_learning_rate", self.cls_name) 383 validator.check_positive_int(decay_steps, 'decay_steps', self.cls_name) 384 validator.check_value_type('update_decay_steps', update_decay_steps, [bool], self.cls_name) 385 validator.check_positive_float(power, 'power', self.cls_name) 386 validator.check_is_float(power, 'power', self.cls_name) 387 388 self.decay_steps = decay_steps 389 self.start_learning_rate = learning_rate 390 self.end_learning_rate = end_learning_rate 391 self.diff_learning_rate = learning_rate - end_learning_rate 392 self.power = power 393 self.update_decay_steps = update_decay_steps 394 self.pow = P.Pow() 395 self.ceil = P.Ceil() 396 self.min = P.Minimum() 397 self.max = P.Maximum() 398 399 def construct(self, global_step): 400 tmp_global_step = P.Cast()(global_step, mstype.float32) 401 tmp_decay_step = self.decay_steps 402 if self.update_decay_steps: 403 tmp_decay_step = tmp_decay_step * self.max(self.ceil(tmp_global_step / tmp_decay_step), 1) 404 else: 405 tmp_global_step = self.min(tmp_global_step, tmp_decay_step) 406 p = tmp_global_step / tmp_decay_step 407 lr = self.diff_learning_rate * self.pow(1.0 - p, self.power) + self.end_learning_rate 408 return lr 409 410 411class WarmUpLR(LearningRateSchedule): 412 r""" 413 Gets learning rate warming up. 414 415 For the i-th step, the formula of computing warmup_learning_rate[i] is: 416 417 .. math:: 418 warmup\_learning\_rate[i] = learning\_rate * tmp\_step / warmup\_steps 419 420 Where : 421 422 .. math: 423 tmp\_step=min(current\_step, warmup\_steps) 424 425 Args: 426 learning_rate (float): The initial value of learning rate. 427 warmup_steps (int): The warm up steps of learning rate. 428 429 Inputs: 430 - **global_step** (Tensor) - The current step number. 431 432 Outputs: 433 Tensor. The learning rate value for the current step with shape :math:`()`. 434 435 Raises: 436 TypeError: If `learning_rate` is not a float. 437 TypeError: If `warmup_steps` is not an int. 438 ValueError: If `warmup_steps` is less than 1. 439 ValueError: If `learning_rate` is less than or equal to 0. 440 441 Supported Platforms: 442 ``Ascend`` ``GPU`` 443 444 Examples: 445 >>> learning_rate = 0.1 446 >>> warmup_steps = 2 447 >>> global_step = Tensor(2, mstype.int32) 448 >>> warmup_lr = nn.WarmUpLR(learning_rate, warmup_steps) 449 >>> result = warmup_lr(global_step) 450 >>> print(result) 451 0.1 452 """ 453 def __init__(self, learning_rate, warmup_steps): 454 super(WarmUpLR, self).__init__() 455 if not isinstance(learning_rate, float): 456 raise TypeError("The learning_rate must be float.") 457 validator.check_non_negative_float(learning_rate, "learning_rate", self.cls_name) 458 validator.check_positive_int(warmup_steps, 'warmup_steps', self.cls_name) 459 self.warmup_steps = warmup_steps 460 self.learning_rate = learning_rate 461 self.min = P.Minimum() 462 self.cast = P.Cast() 463 464 def construct(self, global_step): 465 warmup_percent = self.cast(self.min(global_step, self.warmup_steps), mstype.float32) / self.warmup_steps 466 return self.learning_rate * warmup_percent 467 468 469__all__ = [ 470 'ExponentialDecayLR', 471 'NaturalExpDecayLR', 472 'InverseDecayLR', 473 'CosineDecayLR', 474 'PolynomialDecayLR', 475 'WarmUpLR' 476] 477