1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" test_loss_scale """ 16import numpy as np 17import pytest 18import mindspore.nn as nn 19from mindspore import context 20from mindspore import Tensor, Parameter 21from mindspore.nn.wrap.cell_wrapper import WithLossCell 22from mindspore.nn.wrap.loss_scale import TrainOneStepWithLossScaleCell 23from mindspore.ops import operations as P 24from mindspore.nn.optim import Momentum, RMSProp 25from mindspore.ops import functional as F 26from mindspore.common import dtype as mstype 27from mindspore.train import Model 28from mindspore.nn.optim import Lamb 29from mindspore.train.loss_scale_manager import DynamicLossScaleManager 30 31def setup_module(): 32 context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") 33 34class MindData: 35 """ Stub for MindData """ 36 37 def __init__(self, size=None, batch_size=None, repeat_count=1, 38 np_types=None, output_shapes=None, input_indexes=(), func_name=''): 39 self._size = size 40 self._batch_size = batch_size 41 self._repeat_count = repeat_count 42 self._np_types = np_types 43 self._output_shapes = output_shapes 44 self._input_indexes = input_indexes 45 self._func_name = func_name 46 self._iter_num = 0 47 48 def get_dataset_size(self): 49 return self._size 50 51 def get_repeat_count(self): 52 return self._repeat_count 53 54 def get_batch_size(self): 55 return self._batch_size 56 57 def output_types(self): 58 return self._np_types 59 60 def output_shapes(self): 61 return self._output_shapes 62 63 def create_tuple_iterator(self, num_epochs=-1, do_copy=True): 64 return self 65 66 @property 67 def input_indexes(self): 68 return self._input_indexes 69 70 @property 71 def func_name(self): 72 return self._func_name 73 74 def send(self): 75 pass 76 77 def __len__(self): 78 return self._size 79 80 def __iter__(self): 81 return self 82 83 def __next__(self): 84 if self._size < self._iter_num: 85 raise StopIteration 86 self._iter_num += 1 87 next_value = [] 88 for shape, typ in zip(self._output_shapes, self._np_types): 89 next_value.append(Tensor(np.ndarray(shape, typ))) 90 91 return tuple(next_value) 92 93 def next(self): 94 return self.__next__() 95 96 def reset(self): 97 self._iter_num = 0 98 99 100class MindDataSet(MindData): 101 def __init__(self, dataset_types, dataset_shapes): 102 super(MindDataSet, self).__init__(size=2, batch_size=32, 103 np_types=dataset_types, 104 output_shapes=dataset_shapes, 105 input_indexes=(0, 1), func_name='') 106 def __next__(self): 107 if self._size < self._iter_num: 108 raise StopIteration 109 self._iter_num += 1 110 res = [] 111 for shape, t in zip(self._output_shapes, self._np_types): 112 res.append(Tensor(np.ones(shape).astype(t))) 113 return tuple(res) 114 115class NetFP16(nn.Cell): 116 def __init__(self, in_features, out_features): 117 super(NetFP16, self).__init__() 118 self.weight = Parameter(Tensor(np.ones([out_features, in_features]).astype(np.float32)), name="weight") 119 self.bias = Parameter(Tensor(np.ones([out_features]).astype(np.float32)), name="bias") 120 self.matmul = P.MatMul() 121 self.add = P.Add() 122 self.cast = P.Cast() 123 124 def construct(self, x): 125 output = self.cast(self.add(self.matmul(self.cast(x, mstype.float16), 126 self.cast(self.weight, mstype.float16)), 127 self.cast(self.bias, mstype.float16)), mstype.float32) 128 return output 129 130def get_axis(x): 131 shape_op = P.Shape() 132 shape = shape_op(x) 133 length = F.tuple_len(shape) 134 perm = F.make_range(0, length) 135 return perm 136 137class MSELoss(nn.Cell): 138 def __init__(self): 139 super(MSELoss, self).__init__() 140 self.sum = P.ReduceSum() 141 self.square = P.Square() 142 self.reduce_mean = P.ReduceMean() 143 144 def construct(self, data, label): 145 diff = data - label 146 return self.reduce_mean(self.square(diff), get_axis(diff)) 147 148@pytest.mark.level1 149@pytest.mark.platform_arm_ascend_training 150@pytest.mark.platform_x86_ascend_training 151@pytest.mark.env_onecard 152def test_loss_scale_fp16_lr_overflow(): 153 inputs = Tensor(np.ones([16, 16]).astype(np.float32)) 154 label = Tensor(np.zeros([16, 16]).astype(np.float32)) 155 lr = Tensor(np.ones([1], np.float32) * 0.1) 156 net = NetFP16(16, 16) 157 net.set_train() 158 159 loss = MSELoss() 160 optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) 161 162 net_with_loss = WithLossCell(net, loss) 163 train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, 164 scale_sense=Tensor(np.full((1), np.finfo(np.float32).max), 165 dtype=mstype.float32)) 166 output_1 = train_network(inputs, label) 167 output_2 = train_network(inputs, label) 168 assert output_1[0].asnumpy() == output_2[0].asnumpy() 169 assert output_1[1].asnumpy() == output_2[1].asnumpy() == True 170 171@pytest.mark.level1 172@pytest.mark.platform_arm_ascend_training 173@pytest.mark.platform_x86_ascend_training 174@pytest.mark.env_onecard 175def test_loss_scale_fp16_lr_overflow_set_sense_scale(): 176 inputs = Tensor(np.ones([16, 16]).astype(np.float32)) 177 label = Tensor(np.zeros([16, 16]).astype(np.float32)) 178 lr = Tensor(np.ones([1], np.float32) * 0.1) 179 net = NetFP16(16, 16) 180 net.set_train() 181 182 loss = MSELoss() 183 optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) 184 185 net_with_loss = WithLossCell(net, loss) 186 train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, 187 scale_sense=Tensor(np.full((1), np.finfo(np.float32).max), 188 dtype=mstype.float32)) 189 output_1 = train_network(inputs, label) 190 191 train_network.set_sense_scale(Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)) 192 output_2 = train_network(inputs, label) 193 assert output_1[0].asnumpy() == output_2[0].asnumpy() 194 assert output_1[1].asnumpy() == output_2[1].asnumpy() == True 195 196@pytest.mark.level1 197@pytest.mark.platform_arm_ascend_training 198@pytest.mark.platform_x86_ascend_training 199@pytest.mark.env_onecard 200def test_loss_scale_fp16_model_train_overflow(): 201 dataset_types = (np.float32, np.float32) 202 dataset_shapes = ((16, 16), (16, 16)) 203 dataset = MindDataSet(dataset_types, dataset_shapes) 204 205 net = NetFP16(16, 16) 206 net.set_train() 207 208 loss = MSELoss() 209 optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) 210 scale_manager = DynamicLossScaleManager(init_loss_scale=16, scale_factor=2, scale_window=2) 211 model = Model(net, loss_fn=loss, optimizer=optimizer, metrics=None, loss_scale_manager=scale_manager) 212 model.train(2, dataset, dataset_sink_mode=False) 213 214@pytest.mark.level1 215@pytest.mark.platform_arm_ascend_training 216@pytest.mark.platform_x86_ascend_training 217@pytest.mark.env_onecard 218def test_loss_scale_fp16_opt_rmsprop_overflow(): 219 inputs = Tensor(np.ones([16, 16]).astype(np.float32)) 220 label = Tensor(np.zeros([16, 16]).astype(np.float32)) 221 net = NetFP16(16, 16) 222 net.set_train() 223 224 loss = MSELoss() 225 optimizer = RMSProp(net.trainable_params(), learning_rate=0.1) 226 net_with_loss = WithLossCell(net, loss) 227 train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, 228 scale_sense=Tensor(np.full(1, np.finfo(np.float32).max), 229 dtype=mstype.float32)) 230 output_1 = train_network(inputs, label) 231 output_2 = train_network(inputs, label) 232 assert output_1[0].asnumpy() == output_2[0].asnumpy() 233 assert output_1[1].asnumpy() == output_2[1].asnumpy() == True 234 235@pytest.mark.level1 236@pytest.mark.platform_arm_ascend_training 237@pytest.mark.platform_x86_ascend_training 238@pytest.mark.env_onecard 239def test_loss_scale_fp16_overflow(): 240 inputs = Tensor(np.ones([16, 16]).astype(np.float32)) 241 label = Tensor(np.zeros([16, 16]).astype(np.float32)) 242 net = NetFP16(16, 16) 243 net.set_train() 244 245 loss = MSELoss() 246 optimizer = Lamb(net.trainable_params(), learning_rate=0.01) 247 net_with_loss = WithLossCell(net, loss) 248 net_with_loss.set_grad() 249 train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, 250 scale_sense=Tensor(np.full((1), np.finfo(np.float32).max), 251 dtype=mstype.float32)) 252 output_1 = train_network(inputs, label) 253 output_2 = train_network(inputs, label) 254 assert output_1[0].asnumpy() == output_2[0].asnumpy() 255 assert output_1[1].asnumpy() == output_2[1].asnumpy() == True 256