• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15""" test adafactor """
16import numpy as np
17import pytest
18
19import mindspore.nn as nn
20from mindspore import Tensor, Parameter, context
21from mindspore.common.api import _cell_graph_executor
22from mindspore.nn import TrainOneStepCell, WithLossCell
23from mindspore.nn.optim.adafactor import AdaFactor
24from mindspore.ops import operations as P
25
26
27@pytest.fixture(scope="module", autouse=True)
28def setup_teardown():
29    context.set_context(enable_sparse=True)
30    yield
31    context.set_context(enable_sparse=False)
32
33
34class Net(nn.Cell):
35    """ Net definition """
36
37    def __init__(self):
38        super(Net, self).__init__()
39        self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
40        self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
41        self.matmul = P.MatMul()
42        self.biasAdd = P.BiasAdd()
43
44    def construct(self, x):
45        x = self.biasAdd(self.matmul(x, self.weight), self.bias)
46        return x
47
48
49class NetWithoutWeight(nn.Cell):
50    def __init__(self):
51        super(NetWithoutWeight, self).__init__()
52        self.matmul = P.MatMul()
53
54    def construct(self, x):
55        x = self.matmul(x, x)
56        return x
57
58
59class NetWithSparseGatherV2(nn.Cell):
60    """ NetWithSparseGatherV2 definition """
61
62    def __init__(self):
63        super(NetWithSparseGatherV2, self).__init__()
64        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
65        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
66        self.axis = 0
67        self.gather = P.SparseGatherV2()
68
69    def construct(self, indices, label):
70        return self.gather(self.weight1, indices, self.axis) + self.weight2
71
72
73def test_adafactor_compile1():
74    """ test adafactor compile """
75    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
76    label = Tensor(np.zeros([1, 10]).astype(np.float32))
77    net = Net()
78    net.set_train()
79
80    loss = nn.SoftmaxCrossEntropyWithLogits()
81    optimizer = AdaFactor(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, relative_step=False)
82
83    net_with_loss = WithLossCell(net, loss)
84    train_network = TrainOneStepCell(net_with_loss, optimizer)
85    _cell_graph_executor.compile(train_network, inputs, label)
86
87
88def test_adafactor_compile2():
89    """ test adafactor compile """
90    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
91    label = Tensor(np.zeros([1, 10]).astype(np.float32))
92    net = Net()
93    net.set_train()
94
95    loss = nn.SoftmaxCrossEntropyWithLogits()
96    optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9)
97
98    net_with_loss = WithLossCell(net, loss)
99    train_network = TrainOneStepCell(net_with_loss, optimizer)
100    _cell_graph_executor.compile(train_network, inputs, label)
101
102
103def test_adafactor_compile3():
104    """ test adafactor compile """
105    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
106    label = Tensor(np.zeros([1, 10]).astype(np.float32))
107    net = Net()
108    net.set_train()
109
110    loss = nn.SoftmaxCrossEntropyWithLogits()
111    optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
112                          scale_parameter=True, relative_step=True,
113                          warmup_init=False, compression=False)
114    net_with_loss = WithLossCell(net, loss)
115    train_network = TrainOneStepCell(net_with_loss, optimizer)
116    _cell_graph_executor.compile(train_network, inputs, label)
117
118
119def test_adafactor_compile4():
120    """ test adafactor compile """
121    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
122    label = Tensor(np.zeros([1, 10]).astype(np.float32))
123    net = Net()
124    net.set_train()
125
126    loss = nn.SoftmaxCrossEntropyWithLogits()
127    scale_parameter = False
128    relative_step = True
129    warmup_init = False
130    compression = False
131    optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
132                          scale_parameter=scale_parameter, relative_step=relative_step,
133                          warmup_init=warmup_init, compression=compression)
134    net_with_loss = WithLossCell(net, loss)
135    train_network = TrainOneStepCell(net_with_loss, optimizer)
136    _cell_graph_executor.compile(train_network, inputs, label)
137
138
139def test_adafactor_compile5():
140    """ test adafactor compile """
141    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
142    label = Tensor(np.zeros([1, 10]).astype(np.float32))
143    net = Net()
144    net.set_train()
145
146    loss = nn.SoftmaxCrossEntropyWithLogits()
147    scale_parameter = False
148    relative_step = True
149    warmup_init = True
150    compression = True
151    optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
152                          scale_parameter=scale_parameter, relative_step=relative_step,
153                          warmup_init=warmup_init, compression=compression)
154    net_with_loss = WithLossCell(net, loss)
155    train_network = TrainOneStepCell(net_with_loss, optimizer)
156    _cell_graph_executor.compile(train_network, inputs, label)
157
158
159def test_adafactor_compile6():
160    """ test adafactor compile """
161    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
162    label = Tensor(np.zeros([1, 10]).astype(np.float32))
163    net = Net()
164    net.set_train()
165
166    loss = nn.SoftmaxCrossEntropyWithLogits()
167    scale_parameter = True
168    relative_step = True
169    warmup_init = True
170    compression = True
171    optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
172                          scale_parameter=scale_parameter, relative_step=relative_step,
173                          warmup_init=warmup_init, compression=compression)
174    net_with_loss = WithLossCell(net, loss)
175    train_network = TrainOneStepCell(net_with_loss, optimizer)
176    _cell_graph_executor.compile(train_network, inputs, label)
177
178
179def test_adafactor_group1():
180    """ test_adafactor_group_lr_and_weight_decay """
181    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
182    label = Tensor(np.zeros([1, 10]).astype(np.float32))
183    net = Net()
184    net.set_train()
185    loss = nn.SoftmaxCrossEntropyWithLogits()
186    net_with_loss = WithLossCell(net, loss)
187    all_params = net.trainable_params()
188
189    poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
190
191    group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}]
192    optimizer = AdaFactor(group_params, learning_rate=poly_decay_lr, relative_step=False)
193
194    train_network = TrainOneStepCell(net_with_loss, optimizer)
195    _cell_graph_executor.compile(train_network, inputs, label)
196
197
198def test_adafactor_group2():
199    """ test_adafactor_group_lr_and_weight_decay """
200    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
201    label = Tensor(np.zeros([1, 10]).astype(np.float32))
202    net = Net()
203    net.set_train()
204    loss = nn.SoftmaxCrossEntropyWithLogits()
205    net_with_loss = WithLossCell(net, loss)
206    all_params = net.trainable_params()
207
208    schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
209    group_params = [{'params': [all_params[0]]},
210                    {'params': [all_params[1]]}]
211    optimizer = AdaFactor(group_params, learning_rate=schedule_lr, relative_step=False)
212    train_network = TrainOneStepCell(net_with_loss, optimizer)
213    _cell_graph_executor.compile(train_network, inputs, label)
214
215
216def test_adafactor_group3():
217    """ test_adafactor_group_lr_and_weight_decay """
218    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
219    label = Tensor(np.zeros([1, 10]).astype(np.float32))
220    net = Net()
221    net.set_train()
222    loss = nn.SoftmaxCrossEntropyWithLogits()
223    net_with_loss = WithLossCell(net, loss)
224    all_params = net.trainable_params()
225
226    group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}]
227    optimizer = AdaFactor(group_params, learning_rate=None)
228
229    train_network = TrainOneStepCell(net_with_loss, optimizer)
230    _cell_graph_executor.compile(train_network, inputs, label)
231
232
233def test_adafactor_group4():
234    """ test_adafactor_group_lr_and_weight_decay """
235    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
236    label = Tensor(np.zeros([1, 10]).astype(np.float32))
237    net = Net()
238    net.set_train()
239    loss = nn.SoftmaxCrossEntropyWithLogits()
240    net_with_loss = WithLossCell(net, loss)
241    all_params = net.trainable_params()
242
243    group_params = [{'params': [all_params[0]]},
244                    {'params': [all_params[1]]}]
245    optimizer = AdaFactor(group_params, learning_rate=None)
246    train_network = TrainOneStepCell(net_with_loss, optimizer)
247    _cell_graph_executor.compile(train_network, inputs, label)
248
249
250def test_adafactor_group5():
251    """ test_adafactor_group_lr_and_weight_decay """
252    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
253    label = Tensor(np.zeros([1, 10]).astype(np.float32))
254    net = Net()
255    net.set_train()
256    loss = nn.SoftmaxCrossEntropyWithLogits()
257    net_with_loss = WithLossCell(net, loss)
258    all_params = net.trainable_params()
259
260    group_params = [{'params': [all_params[0]]},
261                    {'params': [all_params[1]]}]
262    optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.1)
263    train_network = TrainOneStepCell(net_with_loss, optimizer)
264    _cell_graph_executor.compile(train_network, inputs, label)
265
266
267def test_adafactor_group6():
268    """ test_adafactor_group_lr_and_weight_decay """
269    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
270    label = Tensor(np.zeros([1, 10]).astype(np.float32))
271    net = Net()
272    net.set_train()
273    loss = nn.SoftmaxCrossEntropyWithLogits()
274    net_with_loss = WithLossCell(net, loss)
275    all_params = net.trainable_params()
276
277    group_params = [{'params': [all_params[0]]},
278                    {'params': [all_params[1]]}]
279    optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.2)
280    train_network = TrainOneStepCell(net_with_loss, optimizer)
281    _cell_graph_executor.compile(train_network, inputs, label)
282