1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18 19import mindspore.context as context 20from mindspore.common.tensor import Tensor 21from mindspore.common.parameter import ParameterTuple 22from mindspore.nn import BatchNorm2d, BatchNorm1d, SGD 23from mindspore.nn import Cell 24from mindspore.ops import composite as C 25 26 27class Batchnorm_Net(Cell): 28 def __init__(self, c, weight, bias, moving_mean, moving_var_init, use_batch_statistics=None): 29 super(Batchnorm_Net, self).__init__() 30 self.bn = BatchNorm2d(c, eps=0.00001, momentum=0.1, beta_init=bias, gamma_init=weight, 31 moving_mean_init=moving_mean, moving_var_init=moving_var_init, 32 use_batch_statistics=use_batch_statistics) 33 34 def construct(self, input_data): 35 x = self.bn(input_data) 36 return x 37 38 39class Grad(Cell): 40 def __init__(self, network): 41 super(Grad, self).__init__() 42 self.grad = C.GradOperation(get_all=True, sens_param=True) 43 self.network = network 44 45 def construct(self, input_data, sens): 46 gout = self.grad(self.network)(input_data, sens) 47 return gout 48 49 50@pytest.mark.level0 51@pytest.mark.platform_x86_gpu_training 52@pytest.mark.env_onecard 53def test_train_forward(): 54 x = np.array([[ 55 [[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]], 56 [[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32) 57 expect_output = np.array([[[[-0.6059, 0.3118, 0.3118, 1.2294], 58 [-0.1471, 0.7706, 1.6882, 2.6059], 59 [0.3118, 1.6882, 2.1471, 2.1471], 60 [0.7706, 0.3118, 2.6059, -0.1471]], 61 62 [[0.9119, 1.8518, 1.3819, -0.0281], 63 [-0.0281, 0.9119, 1.3819, 1.8518], 64 [2.7918, 0.4419, -0.4981, 0.9119], 65 [1.8518, 0.9119, 2.3218, -0.9680]]]]).astype(np.float32) 66 67 weight = np.ones(2).astype(np.float32) 68 bias = np.ones(2).astype(np.float32) 69 moving_mean = np.ones(2).astype(np.float32) 70 moving_var_init = np.ones(2).astype(np.float32) 71 error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-4 72 73 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") 74 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), 75 Tensor(moving_mean), Tensor(moving_var_init)) 76 bn_net.set_train() 77 output = bn_net(Tensor(x)) 78 diff = output.asnumpy() - expect_output 79 assert np.all(diff < error) 80 assert np.all(-diff < error) 81 82 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 83 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), 84 Tensor(moving_mean), Tensor(moving_var_init)) 85 bn_net.set_train() 86 output = bn_net(Tensor(x)) 87 diff = output.asnumpy() - expect_output 88 assert np.all(diff < error) 89 assert np.all(-diff < error) 90 91 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 92 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), 93 Tensor(moving_mean), Tensor(moving_var_init)) 94 bn_net.set_train(False) 95 output = bn_net(Tensor(x)) 96 97 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") 98 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), 99 Tensor(moving_mean), Tensor(moving_var_init)) 100 bn_net.set_train(False) 101 output = bn_net(Tensor(x)) 102 103 104@pytest.mark.level0 105@pytest.mark.platform_x86_gpu_training 106@pytest.mark.env_onecard 107def test_train_backward(): 108 x = np.array([[ 109 [[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]], 110 [[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32) 111 grad = np.array([[ 112 [[1, 2, 7, 1], [4, 2, 1, 3], [1, 6, 5, 2], [2, 4, 3, 2]], 113 [[9, 4, 3, 5], [1, 3, 7, 6], [5, 7, 9, 9], [1, 4, 6, 8]]]]).astype(np.float32) 114 expect_output = np.array([[[[-0.69126546, -0.32903028, 1.9651246, -0.88445705], 115 [0.6369296, -0.37732816, -0.93275493, -0.11168876], 116 [-0.7878612, 1.3614, 0.8542711, -0.52222186], 117 [-0.37732816, 0.5886317, -0.11168876, -0.28073236]], 118 119 [[1.6447213, -0.38968924, -1.0174079, -0.55067265], 120 [-2.4305856, -1.1751484, 0.86250514, 0.5502673], 121 [0.39576983, 0.5470243, 1.1715001, 1.6447213], 122 [-1.7996241, -0.7051701, 0.7080077, 0.5437813]]]]).astype(np.float32) 123 124 weight = Tensor(np.ones(2).astype(np.float32)) 125 bias = Tensor(np.ones(2).astype(np.float32)) 126 moving_mean = Tensor(np.ones(2).astype(np.float32)) 127 moving_var_init = Tensor(np.ones(2).astype(np.float32)) 128 error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-6 129 130 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 131 bn_net = Batchnorm_Net(2, weight, bias, moving_mean, moving_var_init) 132 bn_net.set_train() 133 bn_grad = Grad(bn_net) 134 output = bn_grad(Tensor(x), Tensor(grad)) 135 diff = output[0].asnumpy() - expect_output 136 assert np.all(diff < error) 137 assert np.all(-diff < error) 138 139 140@pytest.mark.level0 141@pytest.mark.platform_x86_gpu_training 142@pytest.mark.env_onecard 143def test_train_stats_false_forward(): 144 x = np.array([[ 145 [[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]], 146 [[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32) 147 148 expect_output = np.array([[[[3.707105, 5.121315, 5.121315, 6.535525], 149 [4.41421, 5.8284197, 7.24263, 8.656839], 150 [5.121315, 7.24263, 7.9497347, 7.9497347], 151 [5.8284197, 5.121315, 8.656839, 4.41421]], 152 153 [[6.535525, 7.9497347, 7.24263, 5.121315], 154 [5.121315, 6.535525, 7.24263, 7.9497347], 155 [9.363945, 5.8284197, 4.41421, 6.535525], 156 [7.9497347, 6.535525, 8.656839, 3.707105]]]]).astype(np.float32) 157 158 weight = np.ones(2).astype(np.float32) 159 bias = np.ones(2).astype(np.float32) * 3 160 moving_mean = np.zeros(2).astype(np.float32) 161 moving_var_init = np.ones(2).astype(np.float32) * 2 162 error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-4 163 use_batch_statistics = False 164 165 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") 166 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), 167 Tensor(moving_var_init), use_batch_statistics) 168 bn_net.set_train() 169 output = bn_net(Tensor(x)) 170 diff = output.asnumpy() - expect_output 171 assert np.all(diff < error) 172 assert np.all(-diff < error) 173 174 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 175 bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), 176 Tensor(moving_var_init), use_batch_statistics) 177 bn_net.set_train() 178 output = bn_net(Tensor(x)) 179 diff = output.asnumpy() - expect_output 180 assert np.all(diff < error) 181 assert np.all(-diff < error) 182 183 184@pytest.mark.level0 185@pytest.mark.platform_x86_gpu_training 186@pytest.mark.env_onecard 187def test_infer_backward(): 188 expect_output = np.array([[[[-0.3224156, -0.3840524], [1.1337637, -1.0998858]], 189 [[-0.1724273, -0.877854], [0.0422135, 0.5828123]], 190 [[-1.1006137, 1.1447179], [0.9015862, 0.5024918]]]]).astype(np.float32) 191 np.random.seed(1) 192 x_np = np.random.randn(1, 3, 2, 2).astype(np.float32) 193 input_grad_np = np.random.randn(1, 3, 2, 2).astype(np.float32) 194 ms_input = Tensor(x_np) 195 weight = Tensor(np.ones(3).astype(np.float32)) 196 bias = Tensor(np.zeros(3).astype(np.float32)) 197 moving_mean = Tensor(np.zeros(3).astype(np.float32)) 198 moving_var_init = Tensor(np.ones(3).astype(np.float32)) 199 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 200 ms_net = Batchnorm_Net(3, weight, bias, moving_mean, moving_var_init) 201 ms_net.set_train(False) 202 ms_grad = Grad(ms_net) 203 ms_out_grad_np = ms_grad(ms_input, Tensor(input_grad_np)) 204 assert np.allclose(ms_out_grad_np[0].asnumpy(), expect_output) 205 206 207class BatchNorm1d_Net(Cell): 208 def __init__(self, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', 209 moving_var_init='ones', use_batch_statistics=None): 210 super(BatchNorm1d_Net, self).__init__() 211 self.bn1 = BatchNorm1d(2, eps=0.00001, momentum=0.1, affine=affine, gamma_init=gamma_init, beta_init=beta_init, 212 moving_mean_init=moving_mean_init, moving_var_init=moving_var_init, 213 use_batch_statistics=use_batch_statistics) 214 215 def construct(self, x): 216 x = self.bn1(x) 217 return x 218 219class GradByListNet(Cell): 220 def __init__(self, network): 221 super(GradByListNet, self).__init__() 222 self.grad = C.GradOperation(get_all=True, sens_param=True, get_by_list=True) 223 self.network = network 224 self.params = ParameterTuple(network.trainable_params()) 225 226 def construct(self, x, dy): 227 grad_op = self.grad(self.network, self.params) 228 output = grad_op(x, dy) 229 return output 230 231 232@pytest.mark.level0 233@pytest.mark.platform_x86_gpu_training 234@pytest.mark.env_onecard 235def test_1d_train(): 236 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 237 bn_net = BatchNorm1d_Net(use_batch_statistics=None) 238 grad_net = GradByListNet(bn_net) 239 optimizer = SGD(bn_net.trainable_params(), learning_rate=0.01, momentum=0.9) 240 bn_net.set_train(True) 241 242 x1 = np.array([[1.6243454, -0.6117564], 243 [-0.5281718, -1.0729686], 244 [0.86540765, -2.3015387], 245 [1.7448118, -0.7612069], 246 [0.3190391, -0.24937038]]).astype(np.float32) 247 dy1 = np.array([[1.4621079, -2.0601406], 248 [-0.3224172, -0.38405436], 249 [1.1337694, -1.0998913], 250 [-0.1724282, -0.8778584], 251 [0.04221375, 0.58281523]]).astype(np.float32) 252 x2 = np.array([[-0.19183555, -0.887629], 253 [-0.7471583, 1.6924546], 254 [0.05080776, -0.6369957], 255 [0.19091548, 2.1002553], 256 [0.12015896, 0.6172031]]).astype(np.float32) 257 dy2 = np.array([[0.30017033, -0.35224986], 258 [-1.1425182, -0.34934273], 259 [-0.20889424, 0.5866232], 260 [0.8389834, 0.9311021], 261 [0.2855873, 0.8851412]]).astype(np.float32) 262 x_train = [x1, x2] 263 dy_train = [dy1, dy2] 264 265 dx1 = np.array([[0.8120, -2.0371], 266 [-0.2202, 0.5837], 267 [0.8040, 0.1950], 268 [-1.1823, -0.2786], 269 [-0.2135, 1.5371]]).astype(np.float32) 270 gamma1 = np.array([0.9821, 0.9873]).astype(np.float32) 271 beta1 = np.array([-0.0214, 0.0384]).astype(np.float32) 272 mean1 = np.array([0.7246, -0.8994]).astype(np.float32) 273 variance1 = np.array([0.9036, 0.6559]).astype(np.float32) 274 275 dx2 = np.array([[1.1955, -0.4247], 276 [-0.2425, -0.6789], 277 [-1.4563, 0.3237], 278 [0.8752, 0.3351], 279 [-0.3719, 0.4448]]).astype(np.float32) 280 gamma2 = np.array([0.9370, 0.9687]).astype(np.float32) 281 beta2 = np.array([-0.0415, 0.0559]).astype(np.float32) 282 mean2 = np.array([-0.0314, 0.4294]).astype(np.float32) 283 variance2 = np.array([0.2213, 1.6822]).astype(np.float32) 284 285 exp_dx = [dx1, dx2] 286 exp_gamma = [gamma1, gamma2] 287 exp_beta = [beta1, beta2] 288 exp_mean = [mean1, mean2] 289 exp_variance = [variance1, variance2] 290 291 for data in zip(x_train, dy_train, exp_dx, exp_gamma, exp_beta, exp_mean, exp_variance): 292 output = grad_net(Tensor(data[0]), Tensor(data[1])) 293 assert np.allclose(output[0][0].asnumpy(), data[2], atol=1.0e-4) 294 optimizer(output[1]) 295 assert np.allclose(bn_net.bn1.gamma.asnumpy(), data[3], atol=1.0e-4) 296 assert np.allclose(bn_net.bn1.beta.asnumpy(), data[4], atol=1.0e-4) 297 assert np.allclose(bn_net.bn1.moving_mean.asnumpy(), data[5], atol=1.0e-4) 298 assert np.allclose(bn_net.bn1.moving_variance.asnumpy(), data[6], atol=1.0e-4) 299 300 301@pytest.mark.level0 302@pytest.mark.platform_x86_gpu_training 303@pytest.mark.env_onecard 304def test_1d_eval(): 305 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 306 gamma_init = Tensor(np.array([0.93700373, 0.96870345]).astype(np.float32)) 307 beta_init = Tensor(np.array([-0.04145495, 0.05593072]).astype(np.float32)) 308 mean_init = Tensor(np.array([-0.03142229, 0.4294087]).astype(np.float32)) 309 variance_init = Tensor(np.array([0.2212921, 1.6822311]).astype(np.float32)) 310 bn_net = BatchNorm1d_Net(affine=False, gamma_init=gamma_init, beta_init=beta_init, moving_mean_init=mean_init, 311 moving_var_init=variance_init, use_batch_statistics=None) 312 bn_net.set_train(False) 313 314 x1 = np.array([[-1.1006192, 1.1447237], 315 [0.9015907, 0.50249434], 316 [0.90085596, -0.68372786], 317 [-0.12289023, -0.93576944], 318 [-0.26788807, 0.53035545]]).astype(np.float32) 319 x2 = np.array([[-0.7543979, 1.2528682], 320 [0.5129298, -0.29809284], 321 [0.48851815, -0.07557172], 322 [1.1316293, 1.5198169], 323 [2.1855755, -1.3964963]]).astype(np.float32) 324 x_test = [x1, x2] 325 326 y1 = np.array([[-2.1711, 0.5902], 327 [1.8169, 0.1105], 328 [1.8155, -0.7754], 329 [-0.2236, -0.9637], 330 [-0.5125, 0.1313]]).astype(np.float32) 331 y2 = np.array([[-1.4815, 0.6710], 332 [1.0428, -0.4874], 333 [0.9942, -0.3212], 334 [2.2751, 0.8703], 335 [4.3744, -1.3078]]).astype(np.float32) 336 y_test = [y1, y2] 337 338 for x, y in zip(x_test, y_test): 339 y_pred = bn_net(Tensor(x)) 340 assert np.allclose(y_pred.asnumpy(), y, atol=1.0e-4) 341