1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15import pytest 16import numpy as np 17import mindspore.nn as nn 18from mindspore import context, Tensor 19from mindspore.ops import operations as P 20from mindspore.common import dtype as mstype 21from mindspore.common.parameter import Parameter 22 23context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 24 25 26class AdamNet(nn.Cell): 27 def __init__(self, var, m, v): 28 super(AdamNet, self).__init__() 29 self.apply_adam = P.Adam() 30 self.var = Parameter(var, name="var") 31 self.m = Parameter(m, name="m") 32 self.v = Parameter(v, name="v") 33 34 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): 35 self.apply_adam(self.var, self.m, self.v, beta1_power, 36 beta2_power, lr, beta1, beta2, epsilon, grad) 37 return self.var, self.m, self.v 38 39 40@pytest.mark.level1 41@pytest.mark.platform_arm_ascend_training 42@pytest.mark.platform_x86_ascend_training 43@pytest.mark.env_onecard 44def test_apply_adam(): 45 var = Tensor(np.ones([3, 3, 3]).astype(np.float32)) 46 m = Tensor(np.ones([3, 3, 3]).astype(np.float32)) 47 v = Tensor(np.ones([3, 3, 3]).astype(np.float32)) 48 net = AdamNet(var, m, v) 49 50 beta1_power = Tensor(0.9, mstype.float32) 51 beta2_power = Tensor(0.999, mstype.float32) 52 lr = Tensor(0.001, mstype.float32) 53 beta1 = Tensor(0.9, mstype.float32) 54 beta2 = Tensor(0.999, mstype.float32) 55 epsilon = Tensor(1e-8, mstype.float32) 56 grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32)) 57 new_var, new_m, new_v = net( 58 beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 59 assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \ 60 "The results should be different!" 61 62 63class ApplyAdaMaxNet(nn.Cell): 64 def __init__(self, val, m, v): 65 super(ApplyAdaMaxNet, self).__init__() 66 self.apply_ada_max = P.ApplyAdaMax() 67 self.var = Parameter(val, name="var") 68 self.m = Parameter(m, name="m") 69 self.v = Parameter(v, name="v") 70 71 def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad): 72 self.apply_ada_max(self.var, self.m, self.v, 73 beta1_power, lr, beta1, beta2, epsilon, grad) 74 return self.var, self.m, self.v 75 76 77@pytest.mark.level1 78@pytest.mark.platform_arm_ascend_training 79@pytest.mark.platform_x86_ascend_training 80@pytest.mark.env_onecard 81def test_apply_ada_max(): 82 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 83 m = Tensor(np.random.rand(3, 3).astype(np.float32)) 84 v = Tensor(np.random.rand(3, 3).astype(np.float32)) 85 net = ApplyAdaMaxNet(var, m, v) 86 87 beta1_power = Tensor(0.9, mstype.float32) 88 lr = Tensor(0.001, mstype.float32) 89 beta1 = Tensor(0.9, mstype.float32) 90 beta2 = Tensor(0.99, mstype.float32) 91 epsilon = Tensor(1e-10, mstype.float32) 92 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 93 new_var, new_m, new_v = net(beta1_power, lr, beta1, beta2, epsilon, grad) 94 assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \ 95 "The results should be different!" 96 97 98class ApplyAdadeltaNet(nn.Cell): 99 def __init__(self, var, accum, accum_update): 100 super(ApplyAdadeltaNet, self).__init__() 101 self.apply_adadelta = P.ApplyAdadelta() 102 self.var = Parameter(var, name="var") 103 self.accum = Parameter(accum, name="accum") 104 self.accum_update = Parameter(accum_update, name="accum_update") 105 106 def construct(self, lr, rho, epsilon, grad): 107 self.apply_adadelta(self.var, self.accum, 108 self.accum_update, lr, rho, epsilon, grad) 109 return self.var, self.accum, self.accum_update 110 111 112@pytest.mark.level1 113@pytest.mark.platform_arm_ascend_training 114@pytest.mark.platform_x86_ascend_training 115@pytest.mark.env_onecard 116def test_apply_adadelta(): 117 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 118 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 119 accum_update = Tensor(np.random.rand(3, 3).astype(np.float32)) 120 net = ApplyAdadeltaNet(var, accum, accum_update) 121 122 lr = Tensor(0.001, mstype.float32) 123 rho = Tensor(0.0, mstype.float32) 124 epsilon = Tensor(1e-6, mstype.float32) 125 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 126 new_var, new_accum, new_accum_update = net(lr, rho, epsilon, grad) 127 assert ((new_var != var).any() and (new_accum != accum).any() and (new_accum_update != accum_update).any()), \ 128 "The results should be different!" 129 130 131class ApplyAdagrad(nn.Cell): 132 def __init__(self, var, accum): 133 super(ApplyAdagrad, self).__init__() 134 self.apply_adagrad = P.ApplyAdagrad() 135 self.var = Parameter(var, name="var") 136 self.accum = Parameter(accum, name="accum") 137 138 def construct(self, lr, grad): 139 self.apply_adagrad(self.var, self.accum, lr, grad) 140 return self.var, self.accum 141 142 143@pytest.mark.level1 144@pytest.mark.platform_arm_ascend_training 145@pytest.mark.platform_x86_ascend_training 146@pytest.mark.env_onecard 147def test_apply_adagrad(): 148 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 149 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 150 net = ApplyAdagrad(var, accum) 151 152 lr = Tensor(0.001, mstype.float32) 153 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 154 new_var, new_accum = net(lr, grad) 155 assert ((new_var != var).any() and (new_accum != accum).any()), \ 156 "The results should be different!" 157 158 159class ApplyAdagradV2Net(nn.Cell): 160 def __init__(self, var, accum): 161 super(ApplyAdagradV2Net, self).__init__() 162 self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6) 163 self.var = Parameter(var, name="var") 164 self.accum = Parameter(accum, name="accum") 165 166 def construct(self, lr, grad): 167 self.apply_adagrad_v2(self.var, self.accum, lr, grad) 168 return self.var, self.accum 169 170 171@pytest.mark.level1 172@pytest.mark.platform_arm_ascend_training 173@pytest.mark.platform_x86_ascend_training 174@pytest.mark.env_onecard 175def test_apply_adagrad_v2(): 176 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 177 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 178 net = ApplyAdagradV2Net(var, accum) 179 180 lr = Tensor(0.001, mstype.float32) 181 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 182 new_var, new_accum = net(lr, grad) 183 assert ((new_var != var).any() and (new_accum != accum).any()), \ 184 "The results should be different!" 185 186 187class ApplyAddSignNet(nn.Cell): 188 def __init__(self, var, m): 189 super(ApplyAddSignNet, self).__init__() 190 self.apply_add_sign = P.ApplyAddSign() 191 self.var = Parameter(var, name="var") 192 self.m = Parameter(m, name="m") 193 194 def construct(self, lr, alpha, sign_decay, beta, grad): 195 self.apply_add_sign(self.var, self.m, lr, alpha, 196 sign_decay, beta, grad) 197 return self.var, self.m 198 199 200@pytest.mark.level1 201@pytest.mark.platform_arm_ascend_training 202@pytest.mark.platform_x86_ascend_training 203@pytest.mark.env_onecard 204def test_apply_add_sign(): 205 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 206 m = Tensor(np.random.rand(3, 3).astype(np.float32)) 207 net = ApplyAddSignNet(var, m) 208 209 lr = Tensor(0.001, mstype.float32) 210 alpha = Tensor(1.0, mstype.float32) 211 sign_decay = Tensor(0.99, mstype.float32) 212 beta = Tensor(0.9, mstype.float32) 213 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 214 new_var, new_m = net(lr, alpha, sign_decay, beta, grad) 215 assert ((new_var != var).any() and (new_m != m).any()), \ 216 "The results should be different!" 217 218 219class ApplyCenteredRMSPropNet(nn.Cell): 220 def __init__(self, var): 221 super(ApplyCenteredRMSPropNet, self).__init__() 222 self.apply_centered_rms_prop = P.ApplyCenteredRMSProp() 223 self.var = Parameter(var, name="var") 224 225 def construct(self, mean_grad, mean_square, moment, grad, learning_rate): 226 self.apply_centered_rms_prop(self.var, mean_grad, mean_square, moment, grad, 227 learning_rate, 0.0, 1e-10, 0.05) 228 return self.var 229 230 231@pytest.mark.level1 232@pytest.mark.platform_arm_ascend_training 233@pytest.mark.platform_x86_ascend_training 234@pytest.mark.env_onecard 235def test_apply_centered_rms_prop(): 236 var = Tensor( 237 np.arange(-6, 6).astype(np.float32).reshape(2, 3, 2), mstype.float32) 238 net = ApplyCenteredRMSPropNet(var) 239 240 mean_grad = Tensor(np.arange(12).astype( 241 np.float32).reshape(2, 3, 2), mstype.float32) 242 mean_square = Tensor( 243 np.arange(-8, 4).astype(np.float32).reshape(2, 3, 2), mstype.float32) 244 moment = Tensor(np.arange(12).astype( 245 np.float32).reshape(2, 3, 2), mstype.float32) 246 grad = Tensor(np.arange(12).astype( 247 np.float32).reshape(2, 3, 2), mstype.float32) 248 learning_rate = Tensor(0.9, mstype.float32) 249 new_var = net(mean_grad, mean_square, moment, grad, learning_rate) 250 assert (new_var != var).any(), "The results should be different!" 251 252 253class ApplyFtrlNet(nn.Cell): 254 def __init__(self, var, accum, linear): 255 super(ApplyFtrlNet, self).__init__() 256 self.apply_ftrl = P.ApplyFtrl() 257 self.var = Parameter(var, name="var") 258 self.accum = Parameter(accum, name="accum") 259 self.linear = Parameter(linear, name="linear") 260 261 def construct(self, grad, lr, l1, l2, lr_power): 262 self.apply_ftrl(self.var, self.accum, self.linear, 263 grad, lr, l1, l2, lr_power) 264 return self.var, self.accum, self.linear 265 266 267@pytest.mark.level1 268@pytest.mark.platform_arm_ascend_training 269@pytest.mark.platform_x86_ascend_training 270@pytest.mark.env_onecard 271def test_apply_ftrl(): 272 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 273 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 274 linear = Tensor(np.random.rand(3, 3).astype(np.float32)) 275 net = ApplyFtrlNet(var, accum, linear) 276 277 grad = Tensor(np.random.randint(-4, 4, (3, 3)), mstype.float32) 278 lr = Tensor(0.001, mstype.float32) 279 l1 = Tensor(0.0, mstype.float32) 280 l2 = Tensor(0.0, mstype.float32) 281 lr_power = Tensor(-0.5, mstype.float32) 282 new_var, new_accum, new_linear = net(grad, lr, l1, l2, lr_power) 283 assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \ 284 "The results should be different!" 285 286 287class ApplyGradientDescentNet(nn.Cell): 288 def __init__(self, var): 289 super(ApplyGradientDescentNet, self).__init__() 290 self.apply_gradient_descent = P.ApplyGradientDescent() 291 self.var = Parameter(var, name="var") 292 293 def construct(self, alpha, delta): 294 self.apply_gradient_descent(self.var, alpha, delta) 295 return self.var 296 297 298@pytest.mark.level1 299@pytest.mark.platform_arm_ascend_training 300@pytest.mark.platform_x86_ascend_training 301@pytest.mark.env_onecard 302def test_apply_gradient_descent(): 303 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 304 net = ApplyGradientDescentNet(var) 305 306 alpha = Tensor(0.001, mstype.float32) 307 delta = Tensor(np.random.rand(3, 3).astype(np.float32)) 308 new_var = net(alpha, delta) 309 assert (new_var != var).any(), "The results should be different!" 310 311 312class ApplyMomentumNet(nn.Cell): 313 def __init__(self, var, accum): 314 super(ApplyMomentumNet, self).__init__() 315 self.apply_momentum = P.ApplyMomentum(gradient_scale=1024.0) 316 self.var = Parameter(var, name='var') 317 self.accum = Parameter(accum, name='accum') 318 319 def construct(self, lr, grad, momentum): 320 self.apply_momentum(self.var, self.accum, lr, grad, momentum) 321 return self.var, self.accum 322 323 324@pytest.mark.level1 325@pytest.mark.platform_arm_ascend_training 326@pytest.mark.platform_x86_ascend_training 327@pytest.mark.env_onecard 328def test_apply_momentum(): 329 var = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32)) 330 accum = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32)) 331 net = ApplyMomentumNet(var, accum) 332 333 lr = Tensor(np.random.normal(size=(1,)).astype(np.float32)) 334 grad = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32)) 335 momentum = Tensor(np.random.normal(size=(1,)).astype(np.float32)) 336 new_var, new_accum = net(lr, grad, momentum) 337 assert ((new_var != var).any() and (new_accum != accum).any()), \ 338 "The results should be different!" 339 340 341class ApplyPowerSignNet(nn.Cell): 342 def __init__(self, var, m): 343 super(ApplyPowerSignNet, self).__init__() 344 self.apply_power_sign = P.ApplyPowerSign() 345 self.var = Parameter(var, name="var") 346 self.m = Parameter(m, name="m") 347 348 def construct(self, lr, logbase, sign_decay, beta, grad): 349 self.apply_power_sign(self.var, self.m, lr, 350 logbase, sign_decay, beta, grad) 351 return self.var, self.m 352 353 354@pytest.mark.level1 355@pytest.mark.platform_arm_ascend_training 356@pytest.mark.platform_x86_ascend_training 357@pytest.mark.env_onecard 358def test_apply_power_sign(): 359 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 360 m = Tensor(np.random.rand(3, 3).astype(np.float32)) 361 net = ApplyPowerSignNet(var, m) 362 363 lr = Tensor(0.001, mstype.float32) 364 logbase = Tensor(np.e, mstype.float32) 365 sign_decay = Tensor(0.99, mstype.float32) 366 beta = Tensor(0.9, mstype.float32) 367 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 368 new_var, new_m = net(lr, logbase, sign_decay, beta, grad) 369 assert ((new_var != var).any() and (new_m != m).any()), \ 370 "The results should be different!" 371 372 373class ApplyProximalAdagradNet(nn.Cell): 374 def __init__(self, var, accum): 375 super(ApplyProximalAdagradNet, self).__init__() 376 self.apply_proximal_adagrad = P.ApplyProximalAdagrad() 377 self.var = Parameter(var, name="var") 378 self.accum = Parameter(accum, name='accum') 379 380 def construct(self, lr, l1, l2, grad): 381 self.apply_proximal_adagrad(self.var, self.accum, lr, l1, l2, grad) 382 return self.var, self.accum 383 384 385@pytest.mark.level1 386@pytest.mark.platform_arm_ascend_training 387@pytest.mark.platform_x86_ascend_training 388@pytest.mark.env_onecard 389def test_apply_proximal_adagrad(): 390 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 391 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 392 net = ApplyProximalAdagradNet(var, accum) 393 394 lr = Tensor(0.01, mstype.float32) 395 l1 = Tensor(0.0, mstype.float32) 396 l2 = Tensor(0.0, mstype.float32) 397 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 398 new_var, new_accum = net(lr, l1, l2, grad) 399 assert ((new_var != var).any() and (new_accum != accum).any()), \ 400 "The results should be different!" 401 402 403class ApplyProximalGradientDescentNet(nn.Cell): 404 def __init__(self, var): 405 super(ApplyProximalGradientDescentNet, self).__init__() 406 self.apply_proximal_gradient_descent = P.ApplyProximalGradientDescent() 407 self.var = Parameter(var, name="var") 408 409 def construct(self, alpha, l1, l2, delta): 410 self.apply_proximal_gradient_descent(self.var, alpha, l1, l2, delta) 411 return self.var 412 413 414@pytest.mark.level1 415@pytest.mark.platform_arm_ascend_training 416@pytest.mark.platform_x86_ascend_training 417@pytest.mark.env_onecard 418def test_apply_proximal_gradient_descent(): 419 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 420 net = ApplyProximalGradientDescentNet(var) 421 422 alpha = Tensor(0.001, mstype.float32) 423 l1 = Tensor(0.0, mstype.float32) 424 l2 = Tensor(0.0, mstype.float32) 425 delta = Tensor(np.random.rand(3, 3).astype(np.float32)) 426 new_var = net(alpha, l1, l2, delta) 427 assert (new_var != var).any(), "The results should be different!" 428 429 430class ApplyRMSPropNet(nn.Cell): 431 def __init__(self, var): 432 super(ApplyRMSPropNet, self).__init__() 433 self.apply_rms_prop = P.ApplyRMSProp() 434 self.var = Parameter(var, name="var") 435 436 def construct(self, mean_square, moment, learning_rate, grad): 437 self.apply_rms_prop(self.var, mean_square, moment, 438 learning_rate, grad, 0.0, 1e-10, 0.001) 439 return self.var 440 441 442@pytest.mark.level1 443@pytest.mark.platform_arm_ascend_training 444@pytest.mark.platform_x86_ascend_training 445@pytest.mark.env_onecard 446def test_apply_rms_prop(): 447 var = Tensor(1., mstype.float32) 448 net = ApplyRMSPropNet(var) 449 450 mean_square = Tensor(2., mstype.float32) 451 moment = Tensor(1., mstype.float32) 452 learning_rate = Tensor(0.9, mstype.float32) 453 grad = Tensor(2., mstype.float32) 454 new_var = net(mean_square, moment, learning_rate, grad) 455 assert (new_var != var).any(), "The results should be different!" 456 457 458class FusedSparseAdamNet(nn.Cell): 459 def __init__(self, var, m, v): 460 super(FusedSparseAdamNet, self).__init__() 461 self.fused_sparse_adam = P.FusedSparseAdam() 462 self.var = Parameter(var, name="var") 463 self.m = Parameter(m, name="m") 464 self.v = Parameter(v, name="v") 465 466 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): 467 self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, 468 epsilon, grad, indices) 469 return self.var, self.m, self.v 470 471 472@pytest.mark.level1 473@pytest.mark.platform_arm_ascend_training 474@pytest.mark.platform_x86_ascend_training 475@pytest.mark.env_onecard 476def test_fused_sparse_adam(): 477 var = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 478 m = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 479 v = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 480 net = FusedSparseAdamNet(var, m, v) 481 482 beta1_power = Tensor(0.9, mstype.float32) 483 beta2_power = Tensor(0.999, mstype.float32) 484 lr = Tensor(0.001, mstype.float32) 485 beta1 = Tensor(0.9, mstype.float32) 486 beta2 = Tensor(0.999, mstype.float32) 487 epsilon = Tensor(1e-8, mstype.float32) 488 gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32) 489 indices = Tensor([0, 1], mstype.int32) 490 new_var, new_m, new_v = net( 491 beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) 492 assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \ 493 "The results should be different!" 494 495 496class FusedSparseFtrlNet(nn.Cell): 497 def __init__(self, var, accum, linear): 498 super(FusedSparseFtrlNet, self).__init__() 499 self.fused_sparse_ftrl = P.FusedSparseFtrl( 500 lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5) 501 self.var = Parameter(var, name="var") 502 self.accum = Parameter(accum, name="accum") 503 self.linear = Parameter(linear, name="linear") 504 505 def construct(self, grad, indices): 506 self.fused_sparse_ftrl(self.var, self.accum, 507 self.linear, grad, indices) 508 return self.var, self.accum, self.linear 509 510 511@pytest.mark.level1 512@pytest.mark.platform_arm_ascend_training 513@pytest.mark.platform_x86_ascend_training 514@pytest.mark.env_onecard 515def test_fused_sparse_ftrl(): 516 var = Tensor(np.random.rand(3, 1, 2).astype(np.float32)) 517 accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32)) 518 linear = Tensor(np.random.rand(3, 1, 2).astype(np.float32)) 519 net = FusedSparseFtrlNet(var, accum, linear) 520 521 grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32)) 522 indices = Tensor(np.array([0, 1]).astype(np.int32)) 523 new_var, new_accum, new_linear = net(grad, indices) 524 assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \ 525 "The results should be different!" 526 527 528class FusedSparseLazyAdamNet(nn.Cell): 529 def __init__(self, var, m, v): 530 super(FusedSparseLazyAdamNet, self).__init__() 531 self.fused_sparse_lazyadam = P.FusedSparseLazyAdam() 532 self.var = Parameter(var, name="var") 533 self.m = Parameter(m, name="m") 534 self.v = Parameter(v, name="v") 535 536 def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): 537 self.fused_sparse_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, 538 beta2, epsilon, grad, indices) 539 return self.var, self.m, self.v 540 541 542@pytest.mark.level0 543@pytest.mark.platform_arm_ascend_training 544@pytest.mark.platform_x86_ascend_training 545@pytest.mark.env_onecard 546def test_fused_sparse_lazyadam(): 547 var = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 548 m = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 549 v = Tensor(np.ones([3, 1, 2]).astype(np.float32)) 550 net = FusedSparseLazyAdamNet(var, m, v) 551 552 beta1_power = Tensor(0.9, mstype.float32) 553 beta2_power = Tensor(0.999, mstype.float32) 554 lr = Tensor(0.001, mstype.float32) 555 beta1 = Tensor(0.9, mstype.float32) 556 beta2 = Tensor(0.999, mstype.float32) 557 epsilon = Tensor(1e-8, mstype.float32) 558 gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32) 559 indices = Tensor([0, 1], mstype.int32) 560 new_var, new_m, new_v = net( 561 beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) 562 assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \ 563 "The results should be different!" 564 565 566class FusedSparseProximalAdagradNet(nn.Cell): 567 def __init__(self, var, accum): 568 super(FusedSparseProximalAdagradNet, self).__init__() 569 self.fused_sparse_proximal_adagrad = P.FusedSparseProximalAdagrad() 570 self.var = Parameter(var, name="var") 571 self.accum = Parameter(accum, name="accum") 572 573 def construct(self, lr, l1, l2, grad, indices): 574 self.fused_sparse_proximal_adagrad( 575 self.var, self.accum, lr, l1, l2, grad, indices) 576 return self.var, self.accum 577 578 579@pytest.mark.level1 580@pytest.mark.platform_arm_ascend_training 581@pytest.mark.platform_x86_ascend_training 582@pytest.mark.env_onecard 583def test_fused_sparse_proximal_adagrad(): 584 var = Tensor(np.random.rand(3, 1, 2).astype(np.float32)) 585 accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32)) 586 net = FusedSparseProximalAdagradNet(var, accum) 587 588 lr = Tensor(0.01, mstype.float32) 589 l1 = Tensor(0.0, mstype.float32) 590 l2 = Tensor(0.0, mstype.float32) 591 grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32)) 592 indices = Tensor(np.array([0, 1]).astype(np.int32)) 593 new_var, new_accum = net(lr, l1, l2, grad, indices) 594 assert ((new_var != var).any() and (new_accum != accum).any()), \ 595 "The results should be different!" 596 597 598class SparseApplyAdagradNet(nn.Cell): 599 def __init__(self, var, accum): 600 super(SparseApplyAdagradNet, self).__init__() 601 self.sparse_apply_adagrad = P.SparseApplyAdagrad(lr=0.01) 602 self.var = Parameter(var, name="var") 603 self.accum = Parameter(accum, name="accum") 604 605 def construct(self, grad, indices): 606 self.sparse_apply_adagrad(self.var, self.accum, grad, indices) 607 return self.var, self.accum 608 609 610@pytest.mark.level1 611@pytest.mark.platform_arm_ascend_training 612@pytest.mark.platform_x86_ascend_training 613@pytest.mark.env_onecard 614def test_sparse_apply_adagrad(): 615 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 616 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 617 net = SparseApplyAdagradNet(var, accum) 618 619 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 620 indices = Tensor(np.ones((3,), np.int32)) 621 new_var, _ = net(grad, indices) 622 # new_accum is equal to accum. 623 assert (new_var != var).any(), "The results should be different!" 624 625 626class SparseApplyAdagradV2Net(nn.Cell): 627 def __init__(self, var, accum): 628 super(SparseApplyAdagradV2Net, self).__init__() 629 self.sparse_apply_adagrad_v2 = P.SparseApplyAdagradV2( 630 lr=0.01, epsilon=0.001) 631 self.var = Parameter(var, name="var") 632 self.accum = Parameter(accum, name="accum") 633 634 def construct(self, grad, indices): 635 self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices) 636 return self.var, self.accum 637 638 639@pytest.mark.level1 640@pytest.mark.platform_arm_ascend_training 641@pytest.mark.platform_x86_ascend_training 642@pytest.mark.env_onecard 643def test_sparse_apply_adagrad_v2(): 644 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 645 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 646 net = SparseApplyAdagradV2Net(var, accum) 647 648 grad = grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 649 indices = Tensor(np.ones((3,), np.int32)) 650 new_var, new_accum = net(grad, indices) 651 assert ((new_var != var).any() and (new_accum != accum).any()), \ 652 "The results should be different!" 653 654 655class SparseApplyFtrlNet(nn.Cell): 656 def __init__(self, var, accum, linear): 657 super(SparseApplyFtrlNet, self).__init__() 658 self.sparse_apply_ftrl = P.SparseApplyFtrl( 659 lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5) 660 self.var = Parameter(var, name="var") 661 self.accum = Parameter(accum, name="accum") 662 self.linear = Parameter(linear, name="linear") 663 664 def construct(self, grad, indices): 665 self.sparse_apply_ftrl(self.var, self.accum, 666 self.linear, grad, indices) 667 return self.var, self.accum, self.linear 668 669 670@pytest.mark.level1 671@pytest.mark.platform_arm_ascend_training 672@pytest.mark.platform_x86_ascend_training 673@pytest.mark.env_onecard 674def test_sparse_apply_ftrl(): 675 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 676 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 677 linear = Tensor(np.random.rand(3, 3).astype(np.float32)) 678 net = SparseApplyFtrlNet(var, accum, linear) 679 680 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 681 indices = Tensor(np.ones((3,), np.int32)) 682 new_var, new_accum, new_linear = net(grad, indices) 683 assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \ 684 "The results should be different!" 685 686 687class SparseApplyFtrlV2Net(nn.Cell): 688 def __init__(self, var, accum, linear): 689 super(SparseApplyFtrlV2Net, self).__init__() 690 self.sparse_apply_ftrl_v2 = P.SparseApplyFtrlV2( 691 lr=0.01, l1=0.0, l2=0.0, l2_shrinkage=0.0, lr_power=-0.5) 692 self.var = Parameter(var, name="var") 693 self.accum = Parameter(accum, name="accum") 694 self.linear = Parameter(linear, name="linear") 695 696 def construct(self, grad, indices): 697 self.sparse_apply_ftrl_v2( 698 self.var, self.accum, self.linear, grad, indices) 699 return self.var, self.accum, self.linear 700 701 702@pytest.mark.level1 703@pytest.mark.platform_arm_ascend_training 704@pytest.mark.platform_x86_ascend_training 705@pytest.mark.env_onecard 706def test_sparse_apply_ftrl_v2(): 707 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 708 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 709 linear = Tensor(np.random.rand(3, 3).astype(np.float32)) 710 net = SparseApplyFtrlV2Net(var, accum, linear) 711 712 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 713 indices = Tensor(np.ones((3,), np.int32)) 714 new_var, new_accum, new_linear = net(grad, indices) 715 assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \ 716 "The results should be different!" 717 718 719class SparseApplyProximalAdagradNet(nn.Cell): 720 def __init__(self, var, accum): 721 super(SparseApplyProximalAdagradNet, self).__init__() 722 self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad() 723 self.var = Parameter(var, name="var") 724 self.accum = Parameter(accum, name="accum") 725 726 def construct(self, lr, l1, l2, grad, indices): 727 self.sparse_apply_proximal_adagrad( 728 self.var, self.accum, lr, l1, l2, grad, indices) 729 return self.var, self.accum 730 731 732@pytest.mark.level1 733@pytest.mark.platform_arm_ascend_training 734@pytest.mark.platform_x86_ascend_training 735@pytest.mark.env_onecard 736def test_sparse_apply_proximal_adagrad(): 737 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 738 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 739 net = SparseApplyProximalAdagradNet(var, accum) 740 741 lr = Tensor(0.01, mstype.float32) 742 l1 = Tensor(0.0, mstype.float32) 743 l2 = Tensor(0.0, mstype.float32) 744 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 745 indices = Tensor(np.ones((3,), np.int32)) 746 new_var, new_accum = net(lr, l1, l2, grad, indices) 747 assert ((new_var != var).any() and (new_accum != accum).any()), \ 748 "The results should be different!" 749 750 751class SGDNet(nn.Cell): 752 def __init__(self, var): 753 super(SGDNet, self).__init__() 754 self.sgd = P.SGD() 755 self.var = Parameter(var, name="var") 756 757 def construct(self, gradient, learning_rate, accum, momentum, stat): 758 self.sgd(self.var, gradient, learning_rate, accum, momentum, stat) 759 return self.var 760 761 762@pytest.mark.level1 763@pytest.mark.platform_arm_ascend_training 764@pytest.mark.platform_x86_ascend_training 765@pytest.mark.env_onecard 766def test_sgd(): 767 var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32) 768 net = SGDNet(var) 769 770 gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32) 771 learning_rate = Tensor(0.01, mstype.float32) 772 accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32) 773 momentum = Tensor(0.1, mstype.float32) 774 stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32) 775 new_var = net(gradient, learning_rate, accum, momentum, stat) 776 assert (new_var != var).any(), "The results should be different!" 777 778 779class ApplyProximalAdagradConstantNet(nn.Cell): 780 def __init__(self, var, accum): 781 super().__init__() 782 self.depend = P.Depend() 783 self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad() 784 self.var = Parameter(var, name="var") 785 self.accum = Parameter(accum, name="accum") 786 self.const = Tensor(9999, mstype.float32) 787 788 def construct(self, lr, l1, l2, grad, indices): 789 optimizer = self.sparse_apply_proximal_adagrad( 790 self.var, self.accum, lr, l1, l2, grad, indices) 791 return self.depend(self.const, optimizer) 792 793 794@pytest.mark.level1 795@pytest.mark.platform_arm_ascend_training 796@pytest.mark.platform_x86_ascend_training 797@pytest.mark.env_onecard 798def test_sparse_apply_proximal_adagrad_constant(): 799 var = Tensor(np.random.rand(3, 3).astype(np.float32)) 800 accum = Tensor(np.random.rand(3, 3).astype(np.float32)) 801 net = ApplyProximalAdagradConstantNet(var, accum) 802 lr = Tensor(0.01, mstype.float32) 803 l1 = Tensor(0.1, mstype.float32) 804 l2 = Tensor(0.2, mstype.float32) 805 grad = Tensor(np.random.rand(3, 3).astype(np.float32)) 806 indices = Tensor(np.ones((3,), np.int32)) 807 net(lr, l1, l2, grad, indices) 808 assert (net.parameters_dict()['var'].data != var).any() 809 assert (net.parameters_dict()['accum'].data != accum).any() 810 811 812class MulSGDNet(nn.Cell): 813 def __init__(self, var): 814 super().__init__() 815 self.sgd = P.SGD() 816 self.var = Parameter(var, name="var") 817 self.mul = P.Mul() 818 819 def construct(self, gradient, learning_rate, accum, momentum, stat): 820 out = self.mul(self.var, self.var) 821 self.sgd(self.var, gradient, learning_rate, accum, momentum, stat) 822 return out 823 824 825@pytest.mark.level1 826@pytest.mark.platform_arm_ascend_training 827@pytest.mark.platform_x86_ascend_training 828@pytest.mark.env_onecard 829def test_mul_sgd(): 830 var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32) 831 net = MulSGDNet(var) 832 gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32) 833 learning_rate = Tensor(0.01, mstype.float32) 834 accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32) 835 momentum = Tensor(0.1, mstype.float32) 836 stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32) 837 net(gradient, learning_rate, accum, momentum, stat) 838 assert (net.parameters_dict()['var'].data != var).any() 839