1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" test adam """ 16import numpy as np 17import pytest 18 19import mindspore.nn as nn 20from mindspore import Tensor, Parameter, context 21from mindspore.common.api import _cell_graph_executor 22from mindspore.nn import TrainOneStepCell, WithLossCell 23from mindspore.nn.optim import Adam, AdamWeightDecay 24from mindspore.ops import operations as P 25 26@pytest.fixture(scope="module", autouse=True) 27def setup_teardown(): 28 context.set_context(enable_sparse=True) 29 yield 30 context.set_context(enable_sparse=False) 31 32class Net(nn.Cell): 33 """ Net definition """ 34 35 def __init__(self): 36 super(Net, self).__init__() 37 self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight") 38 self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias") 39 self.matmul = P.MatMul() 40 self.biasAdd = P.BiasAdd() 41 42 def construct(self, x): 43 x = self.biasAdd(self.matmul(x, self.weight), self.bias) 44 return x 45 46 47class NetWithoutWeight(nn.Cell): 48 def __init__(self): 49 super(NetWithoutWeight, self).__init__() 50 self.matmul = P.MatMul() 51 52 def construct(self, x): 53 x = self.matmul(x, x) 54 return x 55 56 57class NetWithSparseGatherV2(nn.Cell): 58 """ NetWithSparseGatherV2 definition """ 59 def __init__(self): 60 super(NetWithSparseGatherV2, self).__init__() 61 self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1") 62 self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") 63 self.axis = 0 64 self.gather = P.SparseGatherV2() 65 66 def construct(self, indices, label): 67 return self.gather(self.weight1, indices, self.axis) + self.weight2 68 69 70def test_adamwithoutparam(): 71 net = NetWithoutWeight() 72 net.set_train() 73 with pytest.raises(ValueError, match=r"Optimizer got an empty parameters list"): 74 AdamWeightDecay(net.trainable_params(), learning_rate=0.1) 75 76 77def test_adamw_compile(): 78 """ test_adamw_compile """ 79 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 80 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 81 net = Net() 82 net.set_train() 83 84 loss = nn.SoftmaxCrossEntropyWithLogits() 85 optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1) 86 87 net_with_loss = WithLossCell(net, loss) 88 train_network = TrainOneStepCell(net_with_loss, optimizer) 89 _cell_graph_executor.compile(train_network, inputs, label) 90 91 92def test_adam_compile(): 93 """ test adam compile """ 94 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 95 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 96 net = Net() 97 net.set_train() 98 99 loss = nn.SoftmaxCrossEntropyWithLogits() 100 optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9) 101 102 net_with_loss = WithLossCell(net, loss) 103 train_network = TrainOneStepCell(net_with_loss, optimizer) 104 _cell_graph_executor.compile(train_network, inputs, label) 105 106 107def test_sparse_adam_compile(): 108 """ test_sparse_adam_compile """ 109 indices = Tensor(np.array([0, 1]).astype(np.int32)) 110 label = Tensor(np.zeros([2, 1, 2]).astype(np.float32)) 111 net = NetWithSparseGatherV2() 112 net.set_train() 113 114 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) 115 optimizer.target = 'CPU' 116 train_network = TrainOneStepCell(net, optimizer) 117 _cell_graph_executor.compile(train_network, indices, label) 118 119 120def test_sparse_adam(): 121 """ test_sparse_adam """ 122 indices = Tensor(np.array([0, 1]).astype(np.int32)) 123 label = Tensor(np.zeros([2, 1, 2]).astype(np.float32)) 124 net = NetWithSparseGatherV2() 125 net.set_train() 126 127 optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) 128 train_network = TrainOneStepCell(net, optimizer) 129 _cell_graph_executor.compile(train_network, indices, label) 130 131 132def test_adam_group1(): 133 """ test_adam_group_lr_and_weight_decay """ 134 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 135 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 136 net = Net() 137 net.set_train() 138 loss = nn.SoftmaxCrossEntropyWithLogits() 139 net_with_loss = WithLossCell(net, loss) 140 all_params = net.trainable_params() 141 142 poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0) 143 144 group_params = [{'params': [all_params[0]], 'lr': poly_decay_lr, 'weight_decay': 0.9}, 145 {'params': [all_params[1]]}] 146 optimizer = nn.Adam(group_params, learning_rate=0.1) 147 148 train_network = TrainOneStepCell(net_with_loss, optimizer) 149 _cell_graph_executor.compile(train_network, inputs, label) 150 151 152def test_adam_group2(): 153 """ test_adam_group_lr_and_weight_decay """ 154 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 155 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 156 net = Net() 157 net.set_train() 158 loss = nn.SoftmaxCrossEntropyWithLogits() 159 net_with_loss = WithLossCell(net, loss) 160 all_params = net.trainable_params() 161 162 schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0) 163 group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9}, 164 {'params': [all_params[1]]}] 165 optimizer = nn.Adam(group_params, learning_rate=schedule_lr) 166 train_network = TrainOneStepCell(net_with_loss, optimizer) 167 _cell_graph_executor.compile(train_network, inputs, label) 168 169 170def test_adamweightdecay_group(): 171 """ test_adam_group_lr_and_weight_decay """ 172 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 173 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 174 net = Net() 175 net.set_train() 176 loss = nn.SoftmaxCrossEntropyWithLogits() 177 net_with_loss = WithLossCell(net, loss) 178 all_params = net.trainable_params() 179 180 schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0) 181 group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9}, 182 {'params': [all_params[1]]}] 183 optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr) 184 train_network = TrainOneStepCell(net_with_loss, optimizer) 185 _cell_graph_executor.compile(train_network, inputs, label) 186 187 188def test_adamoffload_group(): 189 """ test_adam_group_lr_and_weight_decay """ 190 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 191 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 192 net = Net() 193 net.set_train() 194 loss = nn.SoftmaxCrossEntropyWithLogits() 195 net_with_loss = WithLossCell(net, loss) 196 all_params = net.trainable_params() 197 198 schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0) 199 group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9}, 200 {'params': [all_params[1]]}] 201 optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr) 202 train_network = TrainOneStepCell(net_with_loss, optimizer) 203 _cell_graph_executor.compile(train_network, inputs, label) 204 205 206def test_AdamWeightDecay_beta1(): 207 net = Net() 208 with pytest.raises(ValueError): 209 AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1) 210 211 212def test_AdamWeightDecay_beta2(): 213 net = Net() 214 with pytest.raises(ValueError): 215 AdamWeightDecay(net.get_parameters(), beta2=1.0, learning_rate=0.1) 216 217 218def test_AdamWeightDecay_e(): 219 net = Net() 220 with pytest.raises(ValueError): 221 AdamWeightDecay(net.get_parameters(), eps=-0.1, learning_rate=0.1) 222 223 224def test_adam_mindspore_with_empty_params(): 225 net = nn.Flatten() 226 with pytest.raises(ValueError, match=r"Optimizer got an empty parameters list"): 227 AdamWeightDecay(net.get_parameters()) 228