• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15""" test adam """
16import numpy as np
17import pytest
18
19import mindspore.nn as nn
20from mindspore import Tensor, Parameter, context
21from mindspore.common.api import _cell_graph_executor
22from mindspore.nn import TrainOneStepCell, WithLossCell
23from mindspore.nn.optim import Adam, AdamWeightDecay
24from mindspore.ops import operations as P
25
26@pytest.fixture(scope="module", autouse=True)
27def setup_teardown():
28    context.set_context(enable_sparse=True)
29    yield
30    context.set_context(enable_sparse=False)
31
32class Net(nn.Cell):
33    """ Net definition """
34
35    def __init__(self):
36        super(Net, self).__init__()
37        self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
38        self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
39        self.matmul = P.MatMul()
40        self.biasAdd = P.BiasAdd()
41
42    def construct(self, x):
43        x = self.biasAdd(self.matmul(x, self.weight), self.bias)
44        return x
45
46
47class NetWithoutWeight(nn.Cell):
48    def __init__(self):
49        super(NetWithoutWeight, self).__init__()
50        self.matmul = P.MatMul()
51
52    def construct(self, x):
53        x = self.matmul(x, x)
54        return x
55
56
57class NetWithSparseGatherV2(nn.Cell):
58    """ NetWithSparseGatherV2 definition """
59    def __init__(self):
60        super(NetWithSparseGatherV2, self).__init__()
61        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
62        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
63        self.axis = 0
64        self.gather = P.SparseGatherV2()
65
66    def construct(self, indices, label):
67        return self.gather(self.weight1, indices, self.axis) + self.weight2
68
69
70def test_adamwithoutparam():
71    net = NetWithoutWeight()
72    net.set_train()
73    with pytest.raises(ValueError, match=r"Optimizer got an empty parameters list"):
74        AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
75
76
77def test_adamw_compile():
78    """ test_adamw_compile """
79    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
80    label = Tensor(np.zeros([1, 10]).astype(np.float32))
81    net = Net()
82    net.set_train()
83
84    loss = nn.SoftmaxCrossEntropyWithLogits()
85    optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
86
87    net_with_loss = WithLossCell(net, loss)
88    train_network = TrainOneStepCell(net_with_loss, optimizer)
89    _cell_graph_executor.compile(train_network, inputs, label)
90
91
92def test_adam_compile():
93    """ test adam compile """
94    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
95    label = Tensor(np.zeros([1, 10]).astype(np.float32))
96    net = Net()
97    net.set_train()
98
99    loss = nn.SoftmaxCrossEntropyWithLogits()
100    optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9)
101
102    net_with_loss = WithLossCell(net, loss)
103    train_network = TrainOneStepCell(net_with_loss, optimizer)
104    _cell_graph_executor.compile(train_network, inputs, label)
105
106
107def test_sparse_adam_compile():
108    """ test_sparse_adam_compile """
109    indices = Tensor(np.array([0, 1]).astype(np.int32))
110    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
111    net = NetWithSparseGatherV2()
112    net.set_train()
113
114    optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
115    optimizer.target = 'CPU'
116    train_network = TrainOneStepCell(net, optimizer)
117    _cell_graph_executor.compile(train_network, indices, label)
118
119
120def test_sparse_adam():
121    """ test_sparse_adam """
122    indices = Tensor(np.array([0, 1]).astype(np.int32))
123    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
124    net = NetWithSparseGatherV2()
125    net.set_train()
126
127    optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
128    train_network = TrainOneStepCell(net, optimizer)
129    _cell_graph_executor.compile(train_network, indices, label)
130
131
132def test_adam_group1():
133    """ test_adam_group_lr_and_weight_decay """
134    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
135    label = Tensor(np.zeros([1, 10]).astype(np.float32))
136    net = Net()
137    net.set_train()
138    loss = nn.SoftmaxCrossEntropyWithLogits()
139    net_with_loss = WithLossCell(net, loss)
140    all_params = net.trainable_params()
141
142    poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
143
144    group_params = [{'params': [all_params[0]], 'lr': poly_decay_lr, 'weight_decay': 0.9},
145                    {'params': [all_params[1]]}]
146    optimizer = nn.Adam(group_params, learning_rate=0.1)
147
148    train_network = TrainOneStepCell(net_with_loss, optimizer)
149    _cell_graph_executor.compile(train_network, inputs, label)
150
151
152def test_adam_group2():
153    """ test_adam_group_lr_and_weight_decay """
154    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
155    label = Tensor(np.zeros([1, 10]).astype(np.float32))
156    net = Net()
157    net.set_train()
158    loss = nn.SoftmaxCrossEntropyWithLogits()
159    net_with_loss = WithLossCell(net, loss)
160    all_params = net.trainable_params()
161
162    schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
163    group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
164                    {'params': [all_params[1]]}]
165    optimizer = nn.Adam(group_params, learning_rate=schedule_lr)
166    train_network = TrainOneStepCell(net_with_loss, optimizer)
167    _cell_graph_executor.compile(train_network, inputs, label)
168
169
170def test_adamweightdecay_group():
171    """ test_adam_group_lr_and_weight_decay """
172    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
173    label = Tensor(np.zeros([1, 10]).astype(np.float32))
174    net = Net()
175    net.set_train()
176    loss = nn.SoftmaxCrossEntropyWithLogits()
177    net_with_loss = WithLossCell(net, loss)
178    all_params = net.trainable_params()
179
180    schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
181    group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
182                    {'params': [all_params[1]]}]
183    optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr)
184    train_network = TrainOneStepCell(net_with_loss, optimizer)
185    _cell_graph_executor.compile(train_network, inputs, label)
186
187
188def test_adamoffload_group():
189    """ test_adam_group_lr_and_weight_decay """
190    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
191    label = Tensor(np.zeros([1, 10]).astype(np.float32))
192    net = Net()
193    net.set_train()
194    loss = nn.SoftmaxCrossEntropyWithLogits()
195    net_with_loss = WithLossCell(net, loss)
196    all_params = net.trainable_params()
197
198    schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
199    group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
200                    {'params': [all_params[1]]}]
201    optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr)
202    train_network = TrainOneStepCell(net_with_loss, optimizer)
203    _cell_graph_executor.compile(train_network, inputs, label)
204
205
206def test_AdamWeightDecay_beta1():
207    net = Net()
208    with pytest.raises(ValueError):
209        AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1)
210
211
212def test_AdamWeightDecay_beta2():
213    net = Net()
214    with pytest.raises(ValueError):
215        AdamWeightDecay(net.get_parameters(), beta2=1.0, learning_rate=0.1)
216
217
218def test_AdamWeightDecay_e():
219    net = Net()
220    with pytest.raises(ValueError):
221        AdamWeightDecay(net.get_parameters(), eps=-0.1, learning_rate=0.1)
222
223
224def test_adam_mindspore_with_empty_params():
225    net = nn.Flatten()
226    with pytest.raises(ValueError, match=r"Optimizer got an empty parameters list"):
227        AdamWeightDecay(net.get_parameters())
228