# Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """ test lamb """ import numpy as np import mindspore.nn as nn from mindspore import Tensor, Parameter from mindspore.common.api import _cell_graph_executor from mindspore.nn import TrainOneStepCell, WithLossCell from mindspore.nn.optim import Lamb from mindspore.ops import operations as P import mindspore.common.dtype as mstype from mindspore.nn.learning_rate_schedule import LearningRateSchedule, PolynomialDecayLR, WarmUpLR class LambLearningRate(LearningRateSchedule): def __init__(self, learning_rate, end_learning_rate, warmup_steps, decay_steps, power): super(LambLearningRate, self).__init__() self.warmup_lr = WarmUpLR(learning_rate, warmup_steps) self.decay_lr = PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power) self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32)) self.greater = P.Greater() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.cast = P.Cast() def construct(self, global_step): is_warmup = self.cast(self.greater(self.warmup_steps, global_step), mstype.float32) warmup_lr = self.warmup_lr(global_step) decay_lr = self.decay_lr(global_step) lr = (self.one - is_warmup) * decay_lr + is_warmup * warmup_lr return lr class Net(nn.Cell): """ Net definition """ def __init__(self): super(Net, self).__init__() self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight") self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias") self.matmul = P.MatMul() self.biasAdd = P.BiasAdd() def construct(self, x): x = self.biasAdd(self.matmul(x, self.weight), self.bias) return x class NetWithoutWeight(nn.Cell): """ NetWithoutWeight definition """ def __init__(self): super(NetWithoutWeight, self).__init__() self.matmul = P.MatMul() def construct(self, x): x = self.matmul(x, x) return x def test_lamb_compile_dynamic_lr(): """ test_Lamb_compile """ inputs = Tensor(np.ones([1, 64]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32)) net = Net() net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() warmup_decay_lr = LambLearningRate(0.01, 0.0001, 10, 20, 1.0) optimizer = Lamb(net.trainable_params(), warmup_decay_lr) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, optimizer) _cell_graph_executor.compile(train_network, inputs, label) def test_lamb_compile(): """ test_Lamb_compile """ inputs = Tensor(np.ones([1, 64]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32)) net = Net() net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() optimizer = Lamb(net.trainable_params(), 0.02, 0.9) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, optimizer) _cell_graph_executor.compile(train_network, inputs, label) def test_lamb_group(): """ test_Lamb_group_compile """ inputs = Tensor(np.ones([1, 64]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32)) net = Net() net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() warmup_decay_lr = LambLearningRate(0.01, 0.0001, 10, 20, 1.0) all_params = net.trainable_params() group_params = [{'params': [all_params[0]], 'lr': warmup_decay_lr, 'weight_decay': 0.9}, {'params': [all_params[1]]}] optimizer = Lamb(group_params, 0.02) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, optimizer) _cell_graph_executor.compile(train_network, inputs, label)