1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" test adafactor """ 16import numpy as np 17import pytest 18 19import mindspore.nn as nn 20from mindspore import Tensor, Parameter, context 21from mindspore.common.api import _cell_graph_executor 22from mindspore.nn import TrainOneStepCell, WithLossCell 23from mindspore.nn.optim.adafactor import AdaFactor 24from mindspore.ops import operations as P 25 26 27@pytest.fixture(scope="module", autouse=True) 28def setup_teardown(): 29 context.set_context(enable_sparse=True) 30 yield 31 context.set_context(enable_sparse=False) 32 33 34class Net(nn.Cell): 35 """ Net definition """ 36 37 def __init__(self): 38 super(Net, self).__init__() 39 self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight") 40 self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias") 41 self.matmul = P.MatMul() 42 self.biasAdd = P.BiasAdd() 43 44 def construct(self, x): 45 x = self.biasAdd(self.matmul(x, self.weight), self.bias) 46 return x 47 48 49class NetWithoutWeight(nn.Cell): 50 def __init__(self): 51 super(NetWithoutWeight, self).__init__() 52 self.matmul = P.MatMul() 53 54 def construct(self, x): 55 x = self.matmul(x, x) 56 return x 57 58 59class NetWithSparseGatherV2(nn.Cell): 60 """ NetWithSparseGatherV2 definition """ 61 62 def __init__(self): 63 super(NetWithSparseGatherV2, self).__init__() 64 self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1") 65 self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") 66 self.axis = 0 67 self.gather = P.SparseGatherV2() 68 69 def construct(self, indices, label): 70 return self.gather(self.weight1, indices, self.axis) + self.weight2 71 72 73def test_adafactor_compile1(): 74 """ test adafactor compile """ 75 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 76 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 77 net = Net() 78 net.set_train() 79 80 loss = nn.SoftmaxCrossEntropyWithLogits() 81 optimizer = AdaFactor(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, relative_step=False) 82 83 net_with_loss = WithLossCell(net, loss) 84 train_network = TrainOneStepCell(net_with_loss, optimizer) 85 _cell_graph_executor.compile(train_network, inputs, label) 86 87 88def test_adafactor_compile2(): 89 """ test adafactor compile """ 90 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 91 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 92 net = Net() 93 net.set_train() 94 95 loss = nn.SoftmaxCrossEntropyWithLogits() 96 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9) 97 98 net_with_loss = WithLossCell(net, loss) 99 train_network = TrainOneStepCell(net_with_loss, optimizer) 100 _cell_graph_executor.compile(train_network, inputs, label) 101 102 103def test_adafactor_compile3(): 104 """ test adafactor compile """ 105 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 106 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 107 net = Net() 108 net.set_train() 109 110 loss = nn.SoftmaxCrossEntropyWithLogits() 111 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9, 112 scale_parameter=True, relative_step=True, 113 warmup_init=False, compression=False) 114 net_with_loss = WithLossCell(net, loss) 115 train_network = TrainOneStepCell(net_with_loss, optimizer) 116 _cell_graph_executor.compile(train_network, inputs, label) 117 118 119def test_adafactor_compile4(): 120 """ test adafactor compile """ 121 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 122 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 123 net = Net() 124 net.set_train() 125 126 loss = nn.SoftmaxCrossEntropyWithLogits() 127 scale_parameter = False 128 relative_step = True 129 warmup_init = False 130 compression = False 131 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9, 132 scale_parameter=scale_parameter, relative_step=relative_step, 133 warmup_init=warmup_init, compression=compression) 134 net_with_loss = WithLossCell(net, loss) 135 train_network = TrainOneStepCell(net_with_loss, optimizer) 136 _cell_graph_executor.compile(train_network, inputs, label) 137 138 139def test_adafactor_compile5(): 140 """ test adafactor compile """ 141 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 142 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 143 net = Net() 144 net.set_train() 145 146 loss = nn.SoftmaxCrossEntropyWithLogits() 147 scale_parameter = False 148 relative_step = True 149 warmup_init = True 150 compression = True 151 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9, 152 scale_parameter=scale_parameter, relative_step=relative_step, 153 warmup_init=warmup_init, compression=compression) 154 net_with_loss = WithLossCell(net, loss) 155 train_network = TrainOneStepCell(net_with_loss, optimizer) 156 _cell_graph_executor.compile(train_network, inputs, label) 157 158 159def test_adafactor_compile6(): 160 """ test adafactor compile """ 161 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 162 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 163 net = Net() 164 net.set_train() 165 166 loss = nn.SoftmaxCrossEntropyWithLogits() 167 scale_parameter = True 168 relative_step = True 169 warmup_init = True 170 compression = True 171 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9, 172 scale_parameter=scale_parameter, relative_step=relative_step, 173 warmup_init=warmup_init, compression=compression) 174 net_with_loss = WithLossCell(net, loss) 175 train_network = TrainOneStepCell(net_with_loss, optimizer) 176 _cell_graph_executor.compile(train_network, inputs, label) 177 178 179def test_adafactor_group1(): 180 """ test_adafactor_group_lr_and_weight_decay """ 181 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 182 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 183 net = Net() 184 net.set_train() 185 loss = nn.SoftmaxCrossEntropyWithLogits() 186 net_with_loss = WithLossCell(net, loss) 187 all_params = net.trainable_params() 188 189 poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0) 190 191 group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}] 192 optimizer = AdaFactor(group_params, learning_rate=poly_decay_lr, relative_step=False) 193 194 train_network = TrainOneStepCell(net_with_loss, optimizer) 195 _cell_graph_executor.compile(train_network, inputs, label) 196 197 198def test_adafactor_group2(): 199 """ test_adafactor_group_lr_and_weight_decay """ 200 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 201 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 202 net = Net() 203 net.set_train() 204 loss = nn.SoftmaxCrossEntropyWithLogits() 205 net_with_loss = WithLossCell(net, loss) 206 all_params = net.trainable_params() 207 208 schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0) 209 group_params = [{'params': [all_params[0]]}, 210 {'params': [all_params[1]]}] 211 optimizer = AdaFactor(group_params, learning_rate=schedule_lr, relative_step=False) 212 train_network = TrainOneStepCell(net_with_loss, optimizer) 213 _cell_graph_executor.compile(train_network, inputs, label) 214 215 216def test_adafactor_group3(): 217 """ test_adafactor_group_lr_and_weight_decay """ 218 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 219 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 220 net = Net() 221 net.set_train() 222 loss = nn.SoftmaxCrossEntropyWithLogits() 223 net_with_loss = WithLossCell(net, loss) 224 all_params = net.trainable_params() 225 226 group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}] 227 optimizer = AdaFactor(group_params, learning_rate=None) 228 229 train_network = TrainOneStepCell(net_with_loss, optimizer) 230 _cell_graph_executor.compile(train_network, inputs, label) 231 232 233def test_adafactor_group4(): 234 """ test_adafactor_group_lr_and_weight_decay """ 235 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 236 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 237 net = Net() 238 net.set_train() 239 loss = nn.SoftmaxCrossEntropyWithLogits() 240 net_with_loss = WithLossCell(net, loss) 241 all_params = net.trainable_params() 242 243 group_params = [{'params': [all_params[0]]}, 244 {'params': [all_params[1]]}] 245 optimizer = AdaFactor(group_params, learning_rate=None) 246 train_network = TrainOneStepCell(net_with_loss, optimizer) 247 _cell_graph_executor.compile(train_network, inputs, label) 248 249 250def test_adafactor_group5(): 251 """ test_adafactor_group_lr_and_weight_decay """ 252 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 253 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 254 net = Net() 255 net.set_train() 256 loss = nn.SoftmaxCrossEntropyWithLogits() 257 net_with_loss = WithLossCell(net, loss) 258 all_params = net.trainable_params() 259 260 group_params = [{'params': [all_params[0]]}, 261 {'params': [all_params[1]]}] 262 optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.1) 263 train_network = TrainOneStepCell(net_with_loss, optimizer) 264 _cell_graph_executor.compile(train_network, inputs, label) 265 266 267def test_adafactor_group6(): 268 """ test_adafactor_group_lr_and_weight_decay """ 269 inputs = Tensor(np.ones([1, 64]).astype(np.float32)) 270 label = Tensor(np.zeros([1, 10]).astype(np.float32)) 271 net = Net() 272 net.set_train() 273 loss = nn.SoftmaxCrossEntropyWithLogits() 274 net_with_loss = WithLossCell(net, loss) 275 all_params = net.trainable_params() 276 277 group_params = [{'params': [all_params[0]]}, 278 {'params': [all_params[1]]}] 279 optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.2) 280 train_network = TrainOneStepCell(net_with_loss, optimizer) 281 _cell_graph_executor.compile(train_network, inputs, label) 282