1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import time 17import random 18import numpy as np 19import pytest 20 21import mindspore.common.dtype as mstype 22import mindspore.dataset as ds 23import mindspore.dataset.transforms.c_transforms as C 24import mindspore.dataset.vision.c_transforms as vision 25import mindspore.nn as nn 26import mindspore.ops.functional as F 27 28from mindspore import Tensor 29from mindspore import context 30from mindspore import ParameterTuple 31from mindspore.nn import Cell 32from mindspore.ops import operations as P 33from mindspore.ops import composite as CP 34from mindspore.nn.optim.momentum import Momentum 35from mindspore.nn.wrap.cell_wrapper import WithLossCell 36 37random.seed(1) 38np.random.seed(1) 39ds.config.set_seed(1) 40 41 42grad_by_list = CP.GradOperation(get_by_list=True) 43 44 45def weight_variable_0(shape): 46 zeros = np.zeros(shape).astype(np.float32) 47 return Tensor(zeros) 48 49 50def weight_variable_1(shape): 51 ones = np.ones(shape).astype(np.float32) 52 return Tensor(ones) 53 54 55def conv3x3(in_channels, out_channels, stride=1, padding=0): 56 """3x3 convolution """ 57 return nn.Conv2d(in_channels, out_channels, 58 kernel_size=3, stride=stride, padding=padding, weight_init='XavierUniform', 59 has_bias=False, pad_mode="same") 60 61 62def conv1x1(in_channels, out_channels, stride=1, padding=0): 63 """1x1 convolution""" 64 return nn.Conv2d(in_channels, out_channels, 65 kernel_size=1, stride=stride, padding=padding, weight_init='XavierUniform', 66 has_bias=False, pad_mode="same") 67 68 69def conv7x7(in_channels, out_channels, stride=1, padding=0): 70 """1x1 convolution""" 71 return nn.Conv2d(in_channels, out_channels, 72 kernel_size=7, stride=stride, padding=padding, weight_init='XavierUniform', 73 has_bias=False, pad_mode="same") 74 75 76def bn_with_initialize(out_channels): 77 shape = (out_channels) 78 mean = weight_variable_0(shape) 79 var = weight_variable_1(shape) 80 beta = weight_variable_0(shape) 81 bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init='Uniform', 82 beta_init=beta, moving_mean_init=mean, moving_var_init=var) 83 return bn 84 85 86def bn_with_initialize_last(out_channels): 87 shape = (out_channels) 88 mean = weight_variable_0(shape) 89 var = weight_variable_1(shape) 90 beta = weight_variable_0(shape) 91 bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init='Uniform', 92 beta_init=beta, moving_mean_init=mean, moving_var_init=var) 93 return bn 94 95 96def fc_with_initialize(input_channels, out_channels): 97 return nn.Dense(input_channels, out_channels, weight_init='XavierUniform', bias_init='Uniform') 98 99 100class ResidualBlock(nn.Cell): 101 expansion = 4 102 103 def __init__(self, 104 in_channels, 105 out_channels, 106 stride=1): 107 super(ResidualBlock, self).__init__() 108 109 out_chls = out_channels // self.expansion 110 self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0) 111 self.bn1 = bn_with_initialize(out_chls) 112 113 self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0) 114 self.bn2 = bn_with_initialize(out_chls) 115 116 self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) 117 self.bn3 = bn_with_initialize_last(out_channels) 118 119 self.relu = P.ReLU() 120 self.add = P.Add() 121 122 def construct(self, x): 123 identity = x 124 125 out = self.conv1(x) 126 out = self.bn1(out) 127 out = self.relu(out) 128 129 out = self.conv2(out) 130 out = self.bn2(out) 131 out = self.relu(out) 132 133 out = self.conv3(out) 134 out = self.bn3(out) 135 136 out = self.add(out, identity) 137 out = self.relu(out) 138 139 return out 140 141 142class ResidualBlockWithDown(nn.Cell): 143 expansion = 4 144 145 def __init__(self, 146 in_channels, 147 out_channels, 148 stride=1, 149 down_sample=False): 150 super(ResidualBlockWithDown, self).__init__() 151 152 out_chls = out_channels // self.expansion 153 self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0) 154 self.bn1 = bn_with_initialize(out_chls) 155 156 self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0) 157 self.bn2 = bn_with_initialize(out_chls) 158 159 self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) 160 self.bn3 = bn_with_initialize_last(out_channels) 161 162 self.relu = P.ReLU() 163 self.downSample = down_sample 164 165 self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0) 166 self.bn_down_sample = bn_with_initialize(out_channels) 167 self.add = P.Add() 168 169 def construct(self, x): 170 identity = x 171 172 out = self.conv1(x) 173 out = self.bn1(out) 174 out = self.relu(out) 175 176 out = self.conv2(out) 177 out = self.bn2(out) 178 out = self.relu(out) 179 180 out = self.conv3(out) 181 out = self.bn3(out) 182 183 identity = self.conv_down_sample(identity) 184 identity = self.bn_down_sample(identity) 185 186 out = self.add(out, identity) 187 out = self.relu(out) 188 189 return out 190 191 192class MakeLayer0(nn.Cell): 193 194 def __init__(self, block, in_channels, out_channels, stride): 195 super(MakeLayer0, self).__init__() 196 self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True) 197 self.b = block(out_channels, out_channels, stride=stride) 198 self.c = block(out_channels, out_channels, stride=1) 199 200 def construct(self, x): 201 x = self.a(x) 202 x = self.b(x) 203 x = self.c(x) 204 205 return x 206 207 208class MakeLayer1(nn.Cell): 209 210 def __init__(self, block, in_channels, out_channels, stride): 211 super(MakeLayer1, self).__init__() 212 self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True) 213 self.b = block(out_channels, out_channels, stride=1) 214 self.c = block(out_channels, out_channels, stride=1) 215 self.d = block(out_channels, out_channels, stride=1) 216 217 def construct(self, x): 218 x = self.a(x) 219 x = self.b(x) 220 x = self.c(x) 221 x = self.d(x) 222 223 return x 224 225 226class MakeLayer2(nn.Cell): 227 228 def __init__(self, block, in_channels, out_channels, stride): 229 super(MakeLayer2, self).__init__() 230 self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True) 231 self.b = block(out_channels, out_channels, stride=1) 232 self.c = block(out_channels, out_channels, stride=1) 233 self.d = block(out_channels, out_channels, stride=1) 234 self.e = block(out_channels, out_channels, stride=1) 235 self.f = block(out_channels, out_channels, stride=1) 236 237 def construct(self, x): 238 x = self.a(x) 239 x = self.b(x) 240 x = self.c(x) 241 x = self.d(x) 242 x = self.e(x) 243 x = self.f(x) 244 245 return x 246 247 248class MakeLayer3(nn.Cell): 249 250 def __init__(self, block, in_channels, out_channels, stride): 251 super(MakeLayer3, self).__init__() 252 self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True) 253 self.b = block(out_channels, out_channels, stride=1) 254 self.c = block(out_channels, out_channels, stride=1) 255 256 def construct(self, x): 257 x = self.a(x) 258 x = self.b(x) 259 x = self.c(x) 260 261 return x 262 263 264class ResNet(nn.Cell): 265 266 def __init__(self, block, num_classes=100, batch_size=32): 267 super(ResNet, self).__init__() 268 self.batch_size = batch_size 269 self.num_classes = num_classes 270 271 self.conv1 = conv7x7(3, 64, stride=2, padding=0) 272 273 self.bn1 = bn_with_initialize(64) 274 self.relu = P.ReLU() 275 self.maxpool = P.MaxPoolWithArgmax(kernel_size=3, strides=2, pad_mode="SAME") 276 277 self.layer1 = MakeLayer0(block, in_channels=64, out_channels=256, stride=1) 278 self.layer2 = MakeLayer1(block, in_channels=256, out_channels=512, stride=2) 279 self.layer3 = MakeLayer2(block, in_channels=512, out_channels=1024, stride=2) 280 self.layer4 = MakeLayer3(block, in_channels=1024, out_channels=2048, stride=2) 281 282 self.pool = P.ReduceMean(keep_dims=True) 283 self.squeeze = P.Squeeze(axis=(2, 3)) 284 self.fc = fc_with_initialize(512 * block.expansion, num_classes) 285 286 def construct(self, x): 287 x = self.conv1(x) 288 x = self.bn1(x) 289 x = self.relu(x) 290 x = self.maxpool(x)[0] 291 292 x = self.layer1(x) 293 x = self.layer2(x) 294 x = self.layer3(x) 295 x = self.layer4(x) 296 297 x = self.pool(x, (2, 3)) 298 x = self.squeeze(x) 299 x = self.fc(x) 300 return x 301 302 303def resnet50(batch_size, num_classes): 304 return ResNet(ResidualBlock, num_classes, batch_size) 305 306 307def create_dataset(repeat_num=1, training=True, batch_size=32): 308 data_home = "/home/workspace/mindspore_dataset" 309 data_dir = data_home + "/cifar-10-batches-bin" 310 if not training: 311 data_dir = data_home + "/cifar-10-verify-bin" 312 data_set = ds.Cifar10Dataset(data_dir) 313 314 resize_height = 224 315 resize_width = 224 316 rescale = 1.0 / 255.0 317 shift = 0.0 318 319 # define map operations 320 random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT 321 random_horizontal_op = vision.RandomHorizontalFlip() 322 # interpolation default BILINEAR 323 resize_op = vision.Resize((resize_height, resize_width)) 324 rescale_op = vision.Rescale(rescale, shift) 325 normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) 326 changeswap_op = vision.HWC2CHW() 327 type_cast_op = C.TypeCast(mstype.int32) 328 329 c_trans = [] 330 if training: 331 c_trans = [random_crop_op, random_horizontal_op] 332 c_trans += [resize_op, rescale_op, normalize_op, 333 changeswap_op] 334 335 # apply map operations on images 336 data_set = data_set.map(operations=type_cast_op, input_columns="label") 337 data_set = data_set.map(operations=c_trans, input_columns="image") 338 339 # apply shuffle operations 340 data_set = data_set.shuffle(buffer_size=1000) 341 342 # apply batch operations 343 data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) 344 345 # apply repeat operations 346 data_set = data_set.repeat(repeat_num) 347 348 return data_set 349 350 351class CrossEntropyLoss(nn.Cell): 352 def __init__(self): 353 super(CrossEntropyLoss, self).__init__() 354 self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() 355 self.mean = P.ReduceMean() 356 self.one_hot = P.OneHot() 357 self.one = Tensor(1.0, mstype.float32) 358 self.zero = Tensor(0.0, mstype.float32) 359 360 def construct(self, logits, label): 361 label = self.one_hot(label, F.shape(logits)[1], self.one, self.zero) 362 loss = self.cross_entropy(logits, label)[0] 363 loss = self.mean(loss, (-1,)) 364 return loss 365 366 367class GradWrap(Cell): 368 """ GradWrap definition """ 369 370 def __init__(self, network): 371 super(GradWrap, self).__init__() 372 self.network = network 373 self.weights = ParameterTuple(network.trainable_params()) 374 375 def construct(self, x, label): 376 weights = self.weights 377 return grad_by_list(self.network, weights)(x, label) 378 379 380@pytest.mark.level0 381@pytest.mark.platform_x86_gpu_training 382@pytest.mark.env_onecard 383def test_pynative_resnet50(): 384 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") 385 386 batch_size = 32 387 num_classes = 10 388 net = resnet50(batch_size, num_classes) 389 criterion = CrossEntropyLoss() 390 optimizer = Momentum(learning_rate=0.01, momentum=0.9, 391 params=filter(lambda x: x.requires_grad, net.get_parameters())) 392 393 net_with_criterion = WithLossCell(net, criterion) 394 net_with_criterion.set_grad() 395 train_network = GradWrap(net_with_criterion) 396 train_network.set_train() 397 398 step = 0 399 max_step = 21 400 exceed_num = 0 401 data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size) 402 for element in data_set.create_dict_iterator(num_epochs=1): 403 step = step + 1 404 if step > max_step: 405 break 406 start_time = time.time() 407 input_data = element["image"] 408 input_label = element["label"] 409 loss_output = net_with_criterion(input_data, input_label) 410 grads = train_network(input_data, input_label) 411 optimizer(grads) 412 end_time = time.time() 413 cost_time = end_time - start_time 414 print("======step: ", step, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time) 415 if step > 1 and cost_time > 0.18: 416 exceed_num = exceed_num + 1 417 assert exceed_num < 20 418