1# Copyright 2020-2023 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16"""Operators for nn.""" 17from __future__ import absolute_import 18from __future__ import division 19 20import math 21from functools import partial 22from mindspore import log as logger 23from mindspore._checkparam import _check_3d_int_or_tuple 24from mindspore import context 25from mindspore.ops import signature as sig 26from mindspore import _checkparam as validator 27from mindspore.common import dtype as mstype 28from mindspore.common._decorator import deprecated 29from mindspore.ops.primitive import Primitive 30from mindspore.ops.primitive import PrimitiveWithInfer 31from mindspore.ops.primitive import PrimitiveWithCheck 32from mindspore.ops.primitive import prim_attr_register 33from ..auto_generate import (CeLU, Flatten, LogSoftmax, ReLU, ReLU6, Dense, Tanh, 34 Elu, Sigmoid, Softmax, SoftplusExt, HSwish, HSigmoid, AvgPool, BiasAdd, 35 NLLLoss, OneHot, GeLU, FastGeLU, PReLU, RmsNorm, 36 GridSampler3D, GridSampler2D, LayerNorm, LayerNormExt, HShrink, AdamWeightDecay, Dropout, 37 ApplyRotaryPosEmb, PagedAttention, PagedAttentionMask, ReshapeAndCache, 38 FlashAttentionScore, Embedding, UpsampleNearest1D, UpsampleNearest2D, 39 UpsampleNearest3D, UpsampleTrilinear3D, 40 UpsampleBilinear2D, UpsampleLinear1D, 41 BinaryCrossEntropy, BCEWithLogitsLoss) 42from .manually_defined import BatchNorm 43 44 45def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=False, 46 ret_four=False, strict_positive=True): 47 """ 48 Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements. 49 """ 50 51 def _raise_message(): 52 raise ValueError(f"For '{prim_name}' attr '{arg_name}' must be an positive int number or a tuple of two " 53 f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}") 54 55 def _get_return_value(): 56 if isinstance(arg_value, int): 57 ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value) 58 elif len(arg_value) == 2: 59 ret = (1, 1, arg_value[0], arg_value[1]) if ret_four else arg_value 60 elif len(arg_value) == 4: 61 if not allow_four: 62 _raise_message() 63 ret = arg_value if ret_four else (arg_value[2], arg_value[3]) 64 else: 65 _raise_message() 66 return ret 67 68 validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name) 69 ret_value = _get_return_value() 70 for item in ret_value: 71 if isinstance(item, int) and not isinstance(item, bool): 72 if item > 0: 73 continue 74 if not strict_positive and item == 0: 75 continue 76 _raise_message() 77 return ret_value 78 79 80def _check_shape(arg_name, arg_value, prim_name): 81 """ 82 Checks whether an shape dims is a positive int elements. 83 """ 84 85 def _raise_message(): 86 raise ValueError(f"For '{prim_name}' attr '{arg_name}' dims elements must be positive int numbers, " 87 f"but got {arg_value}") 88 89 validator.check_value_type(arg_name, arg_value, (list, tuple), prim_name) 90 for item in arg_value: 91 if isinstance(item, int) and item > 0: 92 continue 93 _raise_message() 94 return arg_value 95 96 97def _update_attr_by_format(arg_value, arg_format): 98 """ 99 If the format is NHWC, should modify the strides or dilation shape. 100 """ 101 ret = arg_value 102 if len(arg_value) == 4 and arg_format == "NHWC": 103 ret = arg_value[1:] + (1,) 104 105 return ret 106 107 108class AdaptiveAvgPool3D(Primitive): 109 r""" 110 AdaptiveAvgPool3D operation. 111 112 .. warning:: 113 This is an experimental API that is subject to change or deletion. 114 115 Refer to :func:`mindspore.ops.adaptive_avg_pool3d` for more details. 116 117 Args: 118 output_size (Union[int, tuple]): Specify the size of output tensor. It 119 can be a single int or a tuple of three ints. 120 121 Inputs: 122 - **x** (Tensor) - The input of AdaptiveAvgPool3D, which is a 5D or 4D tensor. 123 124 Outputs: 125 Tensor, with the same type as the `x`. 126 127 Supported Platforms: 128 ``Ascend`` ``GPU`` ``CPU`` 129 130 Examples: 131 >>> import mindspore 132 >>> import numpy as np 133 >>> from mindspore import nn, Tensor 134 >>> from mindspore.ops import AdaptiveAvgPool3D 135 >>> class AdaptiveAvgPool3DNet(nn.Cell): 136 ... def __init__(self, output_size): 137 ... super(AdaptiveAvgPool3DNet, self).__init__() 138 ... self.output_size_ = output_size 139 ... self.adaptive_avg_pool_3d = AdaptiveAvgPool3D(self.output_size_) 140 ... def construct(self, x_): 141 ... return self.adaptive_avg_pool_3d(x_) 142 ... 143 >>> output_size=(1,1,1) 144 >>> input_x_val = np.zeros((1,1,2,2,2)) 145 >>> input_x_val[:,:,0,:,:] += 1 146 >>> input_x = Tensor(input_x_val, mindspore.float32) 147 >>> adaptive_avg_pool_3d = AdaptiveAvgPool3DNet(output_size) 148 >>> output = adaptive_avg_pool_3d(input_x) 149 >>> print(output) 150 [[[[[0.5]]]]] 151 """ 152 153 @prim_attr_register 154 def __init__(self, output_size): 155 validator.check_value_type("output_size", output_size, [int, tuple], self.name) 156 self.output_size = (output_size,) * 3 if isinstance(self.output_size, int) else output_size 157 for i, size in enumerate(self.output_size): 158 validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name) 159 if size is not None: 160 validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name) 161 162 self.output_size = tuple(-1 if val is None else val for val in self.output_size) 163 164 self.add_prim_attr('output_size', self.output_size) 165 self.init_prim_io_names(inputs=['x'], outputs=['y']) 166 167 168class AdaptiveAvgPool2D(Primitive): 169 r""" 170 AdaptiveAvgPool2D operation. 171 172 Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more details. 173 174 .. warning:: 175 This is an experimental API that is subject to change or deletion. 176 177 Args: 178 output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`, 179 or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None. 180 If it is None, it means the output size is the same as the input size. 181 182 Inputs: 183 - **input_x** (Tensor) - The input of AdaptiveAvgPool2D, which is a 3D or 4D tensor, 184 with float16 ,float32 or float64 data type. 185 186 Outputs: 187 Tensor, with the same type as the `input_x`. 188 189 Supported Platforms: 190 ``Ascend`` ``GPU`` ``CPU`` 191 192 Examples: 193 >>> import mindspore 194 >>> import numpy as np 195 >>> from mindspore import Tensor, ops 196 >>> # case 1: output_size=(None, 2) 197 >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], 198 ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], 199 ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]), mindspore.float32) 200 >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2)) 201 >>> output = adaptive_avg_pool_2d(input_x) 202 >>> print(output) 203 [[[1.5 2.5] 204 [4.5 5.5] 205 [7.5 8.5]] 206 [[1.5 2.5] 207 [4.5 5.5] 208 [7.5 8.5]] 209 [[1.5 2.5] 210 [4.5 5.5] 211 [7.5 8.5]]] 212 >>> # case 2: output_size=2 213 >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2) 214 >>> output = adaptive_avg_pool_2d(input_x) 215 >>> print(output) 216 [[[3. 4.] 217 [6. 7.]] 218 [[3. 4.] 219 [6. 7.]] 220 [[3. 4.] 221 [6. 7.]]] 222 >>> # case 3: output_size=(1, 2) 223 >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2)) 224 >>> output = adaptive_avg_pool_2d(input_x) 225 >>> print(output) 226 [[[4.5 5.5]] 227 [[4.5 5.5]] 228 [[4.5 5.5]]] 229 """ 230 231 @prim_attr_register 232 def __init__(self, output_size): 233 """Initialize AdaptiveAvgPool2D.""" 234 self.init_prim_io_names(inputs=['x'], outputs=['y']) 235 validator.check_value_type("output_size", output_size, [int, tuple], self.name) 236 if isinstance(output_size, tuple): 237 validator.check_int(len(output_size), 2, validator.EQ, 'length of output_size', self.name) 238 self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size 239 for i, size in enumerate(self.output_size): 240 validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name) 241 if size is not None: 242 validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name) 243 244 self.output_size = tuple(-1 if val is None else val for val in self.output_size) 245 self.add_prim_attr('output_size', self.output_size) 246 247 248class AdaptiveMaxPool2D(Primitive): 249 r""" 250 Performs 2D adaptive max pooling on a multi-plane input signal. 251 252 Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details. 253 254 Args: 255 output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`, 256 or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None. 257 If it is None, it means the output size is the same as the input size. 258 259 Inputs: 260 - **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor, 261 with float16, float32 or float64 data type. 262 263 Outputs: 264 Tensor, with the same type as the `input_x`. 265 266 Supported Platforms: 267 ``Ascend`` ``GPU`` ``CPU`` 268 269 Examples: 270 >>> # case 1: output_size=(None, 2) 271 >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], 272 ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], 273 ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32) 274 >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2)) 275 >>> output = adaptive_max_pool_2d(input_x) 276 >>> print(output[0]) 277 [[[[2. 3.] 278 [5. 6.] 279 [8. 9.]] 280 [[2. 3.] 281 [5. 6.] 282 [8. 9.]] 283 [[2. 3.] 284 [5. 6.] 285 [8. 9.]]]] 286 >>> # case 2: output_size=2 287 >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2) 288 >>> output = adaptive_max_pool_2d(input_x) 289 >>> print(output[0]) 290 [[[[5. 6.] 291 [8. 9.]] 292 [[5. 6.] 293 [8. 9.]] 294 [[5. 6.] 295 [8. 9.]]]] 296 >>> # case 3: output_size=(1, 2) 297 >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2)) 298 >>> output = adaptive_max_pool_2d(input_x) 299 >>> print(output[0]) 300 [[[[8. 9.]] 301 [[8. 9.]] 302 [[8. 9.]]]] 303 """ 304 305 @prim_attr_register 306 def __init__(self, output_size): 307 """Initialize AdaptiveMaxPool2D.""" 308 validator.check_value_type("output_size", output_size, [int, tuple], self.name) 309 if isinstance(output_size, tuple): 310 validator.check_int(len(output_size), 2, validator.EQ, 311 'length of output_size', self.name) 312 self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size 313 self.output_size = (-1 if self.output_size[0] is None else self.output_size[0], 314 -1 if self.output_size[1] is None else self.output_size[1]) 315 for size in self.output_size: 316 validator.check_number("output_size", size, -1, validator.GE, None) 317 self.add_prim_attr('output_size', self.output_size) 318 319 320class AdaptiveMaxPool3D(Primitive): 321 r""" 322 Performs 3D adaptive max pooling on a multi-plane input signal. 323 324 Refer to :func:`mindspore.ops.adaptive_max_pool3d` for more details. 325 326 Inputs: 327 - **x** (Tensor) - Tensor, with shape :math:`(C, D, H, W)` or :math:`(N, C, D, H, W)`. 328 - **output_size** (Union[int, tuple]) - The specified output size, which is an integer that represents depth, 329 height and width, or a tuple of three int numbers that represent depth, height and width respectively. 330 The value must be a positive integer. If it is None, the output size and input size of the corresponding 331 dimension are the same. 332 333 Outputs: 334 - **y** (Tensor) - Tensor, with the same number of dims and data type as the `input`. 335 - **argmax** (Tensor) - Tensor, the indices of max value, which has the same shape as the 336 `y` and it's data type is int32. 337 338 Supported Platforms: 339 ``GPU`` ``CPU`` 340 341 Examples: 342 >>> class AdaptiveMaxPool3DNet(nn.Cell): 343 ... def __init__(self): 344 ... super(AdaptiveMaxPool3DNet, self).__init__() 345 ... self.adaptive_max_pool_3d = ops.AdaptiveMaxPool3D() 346 ... def construct(self, x_, output_size_): 347 ... return self.adaptive_max_pool_3d(x_, output_size_) 348 >>> x = np.arange(0,36).reshape((1, 3, 3, 4)).astype(np.float32) 349 >>> output_size = np.array([1, 1, 2], dtype=np.int32) 350 >>> net = AdaptiveMaxPool3DNet() 351 >>> output = net(Tensor(x), Tensor(output_size)) 352 >>> print(output[0].asnumpy()) 353 [[[[33. 35.]]]] 354 >>> print(output[1].asnumpy()) 355 [[[[33 35]]]] 356 """ 357 358 @prim_attr_register 359 def __init__(self): 360 self.init_prim_io_names(inputs=['x', 'output_size'], outputs=['y', 'argmax']) 361 362 363class Softplus(Primitive): 364 r""" 365 Softplus activation function. 366 367 Softplus is a smooth approximation to the ReLU function. 368 It can be used to constrain the output of a machine to always be positive. 369 The function is shown as follows: 370 371 .. math:: 372 373 \text{output} = \log(1 + \exp(\text{x})) 374 375 Inputs: 376 - **input_x** (Tensor) - Tensor of any dimension. 377 Supported dtypes: 378 379 - GPU/CPU: float16, float32, float64. 380 - Ascend: float16, float32. 381 382 Outputs: 383 Tensor, with the same type and shape as the `input_x`. 384 385 Raises: 386 TypeError: If `input_x` is not a Tensor. 387 TypeError: If the dtype of `input_x` is not float16, float32 or float64. 388 389 Supported Platforms: 390 ``Ascend`` ``GPU`` ``CPU`` 391 392 Examples: 393 >>> import mindspore 394 >>> import numpy as np 395 >>> from mindspore import Tensor, ops 396 >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) 397 >>> softplus = ops.Softplus() 398 >>> output = softplus(input_x) 399 >>> print(output) 400 [1.3132615 2.126928 3.0485873 4.01815 5.0067153] 401 """ 402 403 @prim_attr_register 404 def __init__(self): 405 """Initialize Softplus""" 406 self.init_prim_io_names(inputs=['x'], outputs=['output']) 407 408 409class Softsign(Primitive): 410 r""" 411 Softsign activation function. 412 413 Refer to :func:`mindspore.ops.softsign` for more details. 414 415 Inputs: 416 - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of 417 additional dimensions, with float16 or float32 data type. 418 419 Outputs: 420 Tensor, with the same type and shape as the `input_x`. 421 422 Supported Platforms: 423 ``Ascend`` ``GPU`` ``CPU`` 424 425 Examples: 426 >>> import mindspore 427 >>> import numpy as np 428 >>> from mindspore import Tensor, ops 429 >>> input_x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32) 430 >>> softsign = ops.Softsign() 431 >>> output = softsign(input_x) 432 >>> print(output) 433 [ 0. -0.5 0.6666667 0.9677419 -0.9677419] 434 """ 435 436 @prim_attr_register 437 def __init__(self): 438 """Initialize Softsign""" 439 self.init_prim_io_names(inputs=['x'], outputs=['output']) 440 441 442class ReLUV3(Primitive): 443 r""" 444 Computes ReLUV3 (Rectified Linear Unit activation function) of input tensors element-wise. 445 446 It returns max(x, 0) element-wise. Specially, the neurons with the negative output 447 will be suppressed and the active neurons will stay the same. 448 449 .. math:: 450 451 ReLUV3(x) = (x)^+ = max(0, x) 452 453 Inputs: 454 - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of 455 additional dimensions, data type is 456 `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore.html#mindspore.dtype>`_. 457 458 Outputs: 459 Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`. 460 461 Raises: 462 TypeError: If `input_x` is not a Tensor. 463 464 Supported Platforms: 465 ``Ascend`` ``CPU`` 466 467 Examples: 468 >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) 469 >>> relu_v3 = ops.ReLUV3() 470 >>> output = relu_v3(input_x) 471 >>> print(output) 472 [[0. 4. 0.] 473 [2. 0. 9.]] 474 """ 475 476 @prim_attr_register 477 def __init__(self): 478 """Initialize ReLUV3""" 479 self.init_prim_io_names(inputs=['x'], outputs=['output']) 480 481 482class Mish(PrimitiveWithInfer): 483 r""" 484 Computes MISH(A Self Regularized Non-Monotonic Neural Activation Function) of input tensors element-wise. 485 486 Refer to :func:`mindspore.ops.mish` for more details. 487 488 Inputs: 489 - **x** (Tensor) - The input Tensor. 490 Supported dtypes: 491 492 - GPU/CPU: float16, float32, float64. 493 - Ascend: float16, float32. 494 495 Outputs: 496 Tensor, with the same type and shape as the `x`. 497 498 Supported Platforms: 499 ``Ascend`` ``GPU`` ``CPU`` 500 501 Examples: 502 >>> import mindspore 503 >>> import numpy as np 504 >>> from mindspore import Tensor, ops 505 >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) 506 >>> mish = ops.Mish() 507 >>> output = mish(x) 508 >>> print(output.shape) 509 (2, 3) 510 >>> x = Tensor(2.1, mindspore.float32) 511 >>> output = mish(x) 512 >>> print(output) 513 2.050599 514 """ 515 516 @prim_attr_register 517 def __init__(self): 518 """Initialize Mish""" 519 self.init_prim_io_names(inputs=['x'], outputs=['output']) 520 521 522class SeLU(Primitive): 523 r""" 524 Activation function SeLU (Scaled exponential Linear Unit). 525 526 The activation function is defined as: 527 528 .. math:: 529 E_{i} = 530 scale * 531 \begin{cases} 532 x_{i}, &\text{if } x_{i} \geq 0; \cr 533 \text{alpha} * (\exp(x_i) - 1), &\text{otherwise.} 534 \end{cases} 535 536 where :math:`alpha` and :math:`scale` are pre-defined constants(:math:`alpha=1.67326324` 537 and :math:`scale=1.05070098`). 538 539 See more details in `Self-Normalizing Neural Networks <https://arxiv.org/abs/1706.02515>`_. 540 541 Inputs: 542 - **input_x** (Tensor) - Tensor of any dimension. 543 The data type is int8, int32, float16, float32, float64(only CPU, GPU). 544 545 Outputs: 546 Tensor, with the same type and shape as the `input_x`. 547 548 Raises: 549 TypeError: If dtype of `input_x` is not int8, int32, float16, float32, float64. 550 551 Supported Platforms: 552 ``Ascend`` ``GPU`` ``CPU`` 553 554 Examples: 555 >>> import mindspore 556 >>> import numpy as np 557 >>> from mindspore import Tensor, ops 558 >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) 559 >>> selu = ops.SeLU() 560 >>> output = selu(input_x) 561 >>> print(output) 562 [[-1.1113307 4.202804 -1.7575096] 563 [ 2.101402 -1.7462534 9.456309 ]] 564 """ 565 566 @prim_attr_register 567 def __init__(self): 568 """Initialize SeLU""" 569 self.init_prim_io_names(inputs=['input_x'], outputs=['output']) 570 571 572 573 574class FusedBatchNorm(Primitive): 575 r""" 576 The FusedBatchNorm interface is deprecated, please use the BatchNorm interface. 577 """ 578 579 def __init__(self, mode=0, epsilon=1e-5, momentum=0.1): 580 raise TypeError("The FusedBatchNorm interface is deprecated, please use the BatchNorm interface.") 581 582 583class FusedBatchNormEx(PrimitiveWithCheck): 584 r""" 585 The FusedBatchNormEx interface is deprecated, please use the BatchNorm interface. 586 """ 587 588 def __init__(self, mode=0, epsilon=1e-5, momentum=0.1, data_format="NCHW"): 589 raise TypeError("FusedBatchnormEx interface is deprecated, please use BatchNorm interface.") 590 591 592class InstanceNorm(PrimitiveWithInfer): 593 r""" 594 Instance Normalization over a 4D input. 595 596 This operator applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with 597 additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for 598 Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch 599 of data and the learned parameters which can be described in the following formula. 600 601 .. math:: 602 603 y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta 604 605 where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon. 606 607 Args: 608 epsilon (float): A small value added for numerical stability. Default: ``1e-5`` . 609 momentum (float): The hyper parameter to compute moving average for running_mean and running_var 610 (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`). 611 Momentum value must be [0, 1]. Default: ``0.1`` . 612 613 Inputs: 614 - **input_x** (Tensor) - The input of InstanceNorm, Tensor of shape :math:`(N, C)`, 615 data type: float16 or float32. 616 - **gamma** (Parameter) - Scale, Tensor of shape :math:`(C,)`, 617 data type: float32. 618 - **beta** (Parameter) - Bias, Tensor of shape :math:`(C,)`, 619 data type: float32. 620 - **mean** (Parameter) - Mean value, Tensor of shape :math:`(C,)`, data type: float32. 621 - **variance** (Parameter) - Variance value, Tensor of shape :math:`(C,)`, data type: float32. 622 623 Outputs: 624 Tuple of 3 Tensors, the normalized input, the updated parameters. 625 626 - **output_x** (Tensor) - The output of InstanceNorm, same type and shape as the `input_x`. 627 - **updated_moving_mean** (Tensor) - Updated mean value, Tensor of shape :math:`(NC,)`, data type: float32. 628 - **updated_moving_variance** (Tensor) - Updated variance value, Tensor of shape :math:`(NC,)`, 629 data type: float32. 630 631 Supported Platforms: 632 ``GPU`` 633 634 Raises: 635 TypeError: If `epsilon` or `momentum` is not a float. 636 TypeError: If dtype of `input_x` is neither float16 nor float32. 637 TypeError: If dtype of `gamma`, `beta` or `mean` is not float32. 638 ValueError: If `epsilon` is not in the range of [0, 1). 639 ValueError: If `momentum` is not in the range of [0, 1]. 640 641 Examples: 642 >>> class InstanceNormNet(nn.Cell): 643 >>> def __init__(self): 644 >>> super(InstanceNormNet, self).__init__() 645 >>> self.instance_norm = ops.InstanceNorm() 646 >>> self.gamma = Parameter(Tensor(np.ones([64]), mindspore.float32), name="gamma") 647 >>> self.beta = Parameter(Tensor(np.ones([64]), mindspore.float32), name="beta") 648 >>> self.mean = Parameter(Tensor(np.ones([64]), mindspore.float32), name="mean") 649 >>> self.variance = Parameter(Tensor(np.ones([64]), mindspore.float32), name="variance") 650 >>> 651 >>> def construct(self, input_x): 652 >>> out = self.instance_norm(input_x, self.gamma, self.beta, self.mean, self.variance) 653 >>> return out 654 >>> 655 >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32) 656 >>> net = InstanceNormNet() 657 >>> output = net(input_x) 658 >>> result = output[0].shape 659 >>> print(result) 660 (128, 64, 32, 64) 661 """ 662 __mindspore_signature__ = ( 663 sig.make_sig('input_x', dtype=sig.sig_dtype.T2), 664 sig.make_sig('gamma', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 665 sig.make_sig('beta', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 666 sig.make_sig('mean', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 667 sig.make_sig('variance', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 668 ) 669 670 @prim_attr_register 671 def __init__(self, epsilon=1e-5, momentum=0.1): 672 """Initialize InstanceNorm.""" 673 self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'], 674 outputs=['y', 'save_mean', 'save_variance']) 675 self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name) 676 self.momentum = validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name) 677 self._update_parameter = True 678 self.add_prim_attr('side_effect_mem', True) 679 680 681class InstanceNormV2(Primitive): 682 r""" 683 Instance Normalization over a 4D or 5D input. 684 685 This operator applies Instance Normalization over a 4D or 5D input (a mini-batch of 2D inputs with 686 additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for 687 Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch 688 of data and the learned parameters which can be described in the following formula. 689 690 .. math:: 691 692 y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta 693 694 where :math:`\gamma` is scale(gamma), :math:`\beta` is bias(beta), :math:`\epsilon` is epsilon. 695 696 Note: 697 The format of input `x` support ``NCHW`` and ``NC1HWC0`` in platform ``CPU`` and ``Ascend``. 698 When attr `is_training` is `False`, this module does not tracks the running mean and variance. 699 The output `batch_mean` and `batch_variance` would be all zero. 700 701 Args: 702 is_training(bool): An optional boolean value. Default: ``True``. 703 When set to ``True``, this module tracks the running mean and variance. 704 When set to ``False``, this module does not track such statistics and always uses batch 705 statistics in both training and eval modes. 706 momentum (float): The hyper parameter to compute moving average for running_mean and running_var 707 (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`). 708 Momentum value must be [0, 1]. Default: ``0.1`` . 709 epsilon (float): A small value added to the denominator for numerical stability. 710 Epsilon value must be [0, 1). Default: ``1e-5`` . 711 712 Inputs: 713 - **x** (Tensor) - The input of InstanceNormV2, Tensor of shape :math:`(N, C, H, W)` 714 or :math:`(N, C1, H, W, C0)`, data type: float16 or float32. 715 - **gamma** (Tensor) - Scale, Shape depends on the shape of input `x`, data type: float32. 716 If `x` shape is :math:`(N, C, H, W)`, shape of `gamma` is :math:`(N, C, 1, 1)`. 717 If `x` shape is :math:`(N, C1, H, W, C0)`, shape of `gamma` is :math:`(N, C1, 1, 1, C0)`. 718 - **beta** (Tensor) - Bias, has the same shape and data type as `gamma`. 719 - **mean** (Tensor) - Mean value, has the same shape and data type as `gamma`. 720 - **variance** (Tensor) - Variance value, has the same shape and data type as `gamma`. 721 722 Outputs: 723 Tuple of 3 Tensors, the normalized input, the mean and variance of batch input. 724 725 - **y** (Tensor) - The output of InstanceNormV2, same type and shape as the `x`. 726 - **batch_mean** (Tensor) - The mean value of batch input, same type and shape as the input `mean`. 727 - **batch_variance** (Tensor) - The variance value of batch input, same type and shape as the input `variance`. 728 729 Supported Platforms: 730 ``Ascend`` ``CPU`` 731 732 Raises: 733 TypeError: If either item in the inputs is not Tensor. 734 TypeError: If data type of `x` is neither float16 nor float32. 735 TypeError: If data type of `gamma` is not a Tensor of float32. 736 TypeError: If data type of `beta` is not a Tensor of float32. 737 TypeError: If data type of `mean` is not a Tensor of float32. 738 TypeError: If data type of `variance` is not a Tensor of float32. 739 TypeError: If data type of attr `is_training` is not bool. 740 TypeError: If data type of attr `momentum` is not float. 741 TypeError: If data type of attr `epsilon` is not float. 742 ValueError: If :math:`H * W <= 1` in input `x`. 743 ValueError: If the shape of either item in the inputs is neither 4D nor 5D. 744 ValueError: If `epsilon` is not in the range of [0, 1). 745 ValueError: If `momentum` is not in the range of [0, 1]. 746 747 Examples: 748 >>> x = Tensor(input_data=np.random.randn(128, 48, 32, 64, 12), dtype=mindspore.float32) 749 >>> gamma = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32) 750 >>> beta = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32) 751 >>> mean = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32) 752 >>> var = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32) 753 >>> ops = P.InstanceNormV2() 754 >>> output = ops(x, gamma, beta, mean, var) 755 >>> y_shape = output[0].shape 756 >>> print(y_shape) 757 (128, 48, 32, 64, 12) 758 >>> batch_mean_shape = output[1].shape 759 >>> print(batch_mean_shape) 760 (128, 48, 1, 1, 12) 761 >>> batch_var_shape = output[2].shape 762 >>> print(batch_var_shape) 763 (128, 48, 1, 1, 12) 764 """ 765 __mindspore_signature__ = ( 766 sig.make_sig('x', dtype=sig.sig_dtype.T1), 767 sig.make_sig('gamma', dtype=sig.sig_dtype.T), 768 sig.make_sig('beta', dtype=sig.sig_dtype.T), 769 sig.make_sig('mean', dtype=sig.sig_dtype.T), 770 sig.make_sig('variance', dtype=sig.sig_dtype.T), 771 ) 772 773 @prim_attr_register 774 def __init__(self, is_training=True, momentum=0.1, epsilon=1e-5): 775 """Initialize InstanceNormV2.""" 776 self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'], 777 outputs=['y', 'batch_mean', 'batch_variance']) 778 validator.check_is_float(epsilon, 'epsilon', self.name) 779 validator.check_is_float(momentum, 'momentum', self.name) 780 validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name) 781 validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name) 782 validator.check_bool(is_training, "is_training", self.name) 783 784 785class Conv2D(Primitive): 786 r""" 787 2D convolution layer. 788 789 Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, 790 where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width. 791 792 The output is calculated based on formula: 793 794 .. math:: 795 796 \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + 797 \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)}) 798 799 where :math:`bias` is the output channel bias, :math:`ccor` is 800 the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_, 801 , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map. 802 803 Here are the indices' meanings: 804 805 - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`, 806 where :math:`N` is the batch size of the input. 807 808 - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`, 809 where :math:`C_{out}` is the number of 810 output channels, which is also equal to the number of kernels. 811 812 - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`, 813 where :math:`C_{in}` is the number of 814 input channels, which is also equal to the number of channels in the convolutional kernels. 815 816 Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th 817 output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional 818 kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input 819 channel in the :math:`i`-th batch of the input feature map. 820 821 The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]},\text{kernel_size[1]})`, 822 where :math:`\text{kernel_size[0]}` 823 and :math:`\text{kernel_size[1]}` are the height and width of the kernel, respectively. 824 If we consider the input and output channels as well as the `group` parameter, the complete kernel shape 825 will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, 826 where `group` is the number of groups dividing `x`'s input channel when applying group convolution. 827 828 For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition 829 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 830 831 Note: 832 On Ascend platform, only group convolution in depthwise convolution scenarios is supported. 833 That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. 834 835 Args: 836 out_channel (int): Specifies output channel :math:`C_{out}`. 837 kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. 838 It can be a single int or a tuple of 2 integers. A single int means the value is for both the height 839 and the width. A tuple of 2 ints means the first value is for the height and the other is for the width. 840 mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` . 841 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 842 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 843 844 - ``"same"``: Pad the input around its edges so that the shape of input and output 845 are the same when `stride` is set to ``1``. 846 The amount of padding to is calculated by the operator internally, If the amount is even, it is 847 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 848 If this mode is set, `pad` must be 0. 849 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 850 possible height and width. Extra pixels that could not complete a full stride will 851 be discarded. If this mode is set, `pad` must be 0. 852 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 853 in the height and width directions is determined by the `pad` parameter. 854 If this mode is set, `pad` must be greater than or equal to 0. 855 856 pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input 857 when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 4 ints. 858 If `pad` is one integer, the paddings of top, bottom, left and right are the same, equal to `pad`. 859 If `pad` is a tuple with four integers, the paddings of top, bottom, left and right will be equal to pad[0], 860 pad[1], pad[2], and pad[3] accordingly. Default: ``0`` . 861 stride (Union(int, tuple[int]), optional): Specifies the stride of the convolution kernel's movement. 862 It can be a single int or a tuple of two or four ints. A single int means the stride is the same in 863 both the height and width directions. A tuple of two ints indicates the strides in the height and 864 width directions, respectively. For a tuple of four ints, the two ints correspond to (N, C) dimension 865 are treated as 1, and the two correspond to (H, W) dimensions is the step size in the height 866 and width directions respectively. Default: ``1`` . 867 dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution. 868 It can be a single int or a tuple of 2 or 4 integers. A single int means the dilation size is the same 869 in both the height and width directions. A tuple of two ints represents the dilation size in 870 the height and width directions, respectively. For a tuple of four ints, the two ints correspond 871 to (N, C) dimension are treated as 1, and the two correspond to (H, W) dimensions is the 872 dilation size in the height and width directions respectively. 873 Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a 874 spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction. 875 The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively. 876 Default: ``1`` . 877 group (int, optional): Specifies the number of groups dividing `x`'s input channel when applying 878 group convolution. Default: ``1`` . 879 data_format (str, optional): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` . 880 Default: ``"NCHW"``. (NHWC is only supported in GPU now.) 881 882 Inputs: 883 - **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or 884 :math:`(N, H_{in}, W_{in}, C_{in}, )` depending on `data_format` . 885 - **weight** (Tensor) - The convolutional kernel value, it should has shape 886 :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` . 887 888 Outputs: 889 Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})` 890 or :math:`(N, H_{out}, W_{out}, C_{out}, )`. 891 To see how different pad modes affect the output shape, please refer to 892 :class:`mindspore.nn.Conv2d` for more details. 893 894 Raises: 895 TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple. 896 TypeError: If `out_channel` or `group` is not an int. 897 ValueError: If `kernel_size`, `stride` or `dilation` is less than 1. 898 ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``. 899 ValueError: If `pad` is a tuple whose length is not equal to 4. 900 ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to ``(0, 0, 0, 0)``. 901 ValueError: If `data_format` is neither ``'NHWC'`` nor ``'NCHW'`` . 902 903 Supported Platforms: 904 ``Ascend`` ``GPU`` ``CPU`` 905 906 Examples: 907 >>> import mindspore 908 >>> import numpy as np 909 >>> from mindspore import Tensor, ops 910 >>> # case 1: All parameters use default values. 911 >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) 912 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 913 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3) 914 >>> output = conv2d(x, weight) 915 >>> print(output.shape) 916 (10, 32, 30, 30) 917 >>> # case 2: pad_mode="pad", other parameters being default. 918 >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) 919 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 920 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad", pad=(4, 10, 4, 10)) 921 >>> output = conv2d(x, weight) 922 >>> print(output.shape) 923 (10, 32, 44, 44) 924 >>> # case 3: stride=(2, 4), other parameters being default. 925 >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) 926 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 927 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, stride=(2, 4)) 928 >>> output = conv2d(x, weight) 929 >>> print(output.shape) 930 (10, 32, 15, 8) 931 >>> # case 4: dilation=2, other parameters being default. 932 >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) 933 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 934 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, dilation=2) 935 >>> output = conv2d(x, weight) 936 >>> print(output.shape) 937 (10, 32, 28, 28) 938 >>> # case 5: group=2, other parameters being default. 939 >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32) 940 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 941 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, group=2) 942 >>> output = conv2d(x, weight) 943 >>> print(output.shape) 944 (10, 32, 30, 30) 945 >>> # case 6: All parameters are specified. 946 >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32) 947 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 948 >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad", 949 ... pad=(4, 10, 4, 10), stride=(2, 4), dilation=2, group=2) 950 >>> output = conv2d(x, weight) 951 >>> print(output.shape) 952 (10, 32, 21, 11) 953 """ 954 955 @prim_attr_register 956 def __init__(self, 957 out_channel, 958 kernel_size, 959 mode=1, 960 pad_mode="valid", 961 pad=0, 962 stride=1, 963 dilation=1, 964 group=1, 965 data_format="NCHW"): 966 """Initialize Conv2D""" 967 self.init_prim_io_names(inputs=['x', 'w'], outputs=['output']) 968 self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) 969 self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True) 970 self.add_prim_attr('stride', self.stride) 971 self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True) 972 self.add_prim_attr('dilation', self.dilation) 973 validator.check_value_type('pad', pad, (int, tuple), self.name) 974 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 975 if isinstance(pad, int): 976 pad = (pad,) * 4 977 else: 978 validator.check_equal_int(len(pad), 4, 'pad size', self.name) 979 self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name) 980 981 if pad_mode != 'pad' and pad != (0, 0, 0, 0): 982 raise ValueError(f"For '{self.name}', the 'pad' must be zero when 'pad_mode' is not 'pad', " 983 f"but got 'pad': {self.pad} and 'pad_mode': {self.pad_mode}.") 984 self.add_prim_attr("pad", pad) 985 self.padding = pad 986 if self.pad_mode == 'pad': 987 for item in pad: 988 validator.check_non_negative_int(item, 'pad item', self.name) 989 990 self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) 991 self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name) 992 if context.get_context("device_target") != "GPU" and self.format == "NHWC": 993 raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, " 994 f"but got the 'data_format' is {self.format} " 995 f"and platform is {context.get_context('device_target')}.") 996 self.add_prim_attr('data_format', self.format) 997 self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) 998 self.group = validator.check_positive_int(group, 'group', self.name) 999 self.add_prim_attr('groups', self.group) 1000 1001 1002class DataFormatVecPermute(Primitive): 1003 r""" 1004 Converts the input tensor from the `src_format` to the `dst_format` by permuting its dimensions. 1005 1006 Args: 1007 src_format (str, optional): the source data format, which can be ``'NHWC'`` and ``'NCHW'`` . 1008 Default: ``'NHWC'`` . 1009 dst_format (str, optional): the target data format, which can be ``'NHWC'`` and ``'NCHW'`` . 1010 Default: ``'NCHW'`` . 1011 1012 Inputs: 1013 - **input_x** (Tensor) - A Tensor of shape :math:`(4, )` or :math:`(4, 2)` in source data format. 1014 Supports int32 and int64 datatype. 1015 1016 Outputs: 1017 Tensor, has the same data type and shape as the `input_x`. 1018 1019 Raises: 1020 TypeError: If `input_x` is not a Tensor. 1021 TypeError: If dtype of `input_x` is neither int32 nor int64. 1022 ValueError: If `src_format` or `dst_format` is not a str in ['NHWC', 'NCHW']. 1023 ValueError: If `input_x` shape is not :math:`(4, )` or :math:`(4, 2)`. 1024 1025 Supported Platforms: 1026 ``Ascend`` ``GPU`` ``CPU`` 1027 1028 Examples: 1029 >>> class Net(nn.Cell): 1030 ... def __init__(self, src_format="NHWC", dst_format="NCHW"): 1031 ... super().__init__() 1032 ... self.op = ops.DataFormatVecPermute(src_format, dst_format) 1033 ... def construct(self, x): 1034 ... return self.op(x) 1035 ... 1036 >>> net = Net() 1037 >>> x = Tensor(np.array([1, 2, 3, 4]).astype(np.int32)) 1038 >>> output = net(x) 1039 >>> print(output) 1040 [1 4 2 3] 1041 """ 1042 1043 @prim_attr_register 1044 def __init__(self, src_format='NHWC', dst_format='NCHW'): 1045 """Initialize DataFormatVecPermute.""" 1046 valid_values = ['NHWC', 'NCHW'] 1047 self.src_format = validator.check_string(src_format, valid_values, "src_format", self.name) 1048 self.dst_format = validator.check_string(dst_format, valid_values, "dst_format", self.name) 1049 self.init_prim_io_names(inputs=['input_x'], outputs=['output']) 1050 1051 1052class DepthwiseConv2dNative(PrimitiveWithInfer): 1053 r""" 1054 DepthwiseConv2dNative will be deprecated in the future. Please use :class:`mindspore.nn.Conv2d` instead. 1055 1056 Supported Platforms: 1057 Deprecated 1058 """ 1059 1060 @prim_attr_register 1061 def __init__(self, 1062 channel_multiplier, 1063 kernel_size, 1064 mode=3, 1065 pad_mode="valid", 1066 pad=0, 1067 stride=1, 1068 dilation=1, 1069 group=1): 1070 """Initialize DepthwiseConv2dNative""" 1071 logger.warning("WARN_DEPRECATED: The usage of DepthwiseConv2dNative is deprecated." 1072 " Please use nn.Conv2D.") 1073 self.init_prim_io_names(inputs=['x', 'w'], outputs=['output']) 1074 self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) 1075 self.stride = _check_positive_int_or_tuple('stride', stride, self.name) 1076 if self.stride[0] != self.stride[1]: 1077 raise ValueError("The height and width of 'stride' must be equal," 1078 f"but got height:{self.stride[0]}, width:{self.stride[1]}") 1079 self.add_prim_attr('stride', (1, 1, self.stride[0], self.stride[1])) 1080 1081 self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name) 1082 if self.dilation[0] != self.dilation[1]: 1083 raise ValueError("The height and width of 'dilation' must be equal," 1084 f"but got height:{self.dilation[0]}, width:{self.dilation[1]}") 1085 self.add_prim_attr('dilation', (1, 1, self.dilation[0], self.dilation[1])) 1086 validator.check_value_type('pad', pad, (int, tuple), self.name) 1087 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1088 if isinstance(pad, int): 1089 pad = (pad,) * 4 1090 else: 1091 validator.check_equal_int(len(pad), 4, 'pad size', self.name) 1092 self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) 1093 if pad_mode != 'pad' and pad != (0, 0, 0, 0): 1094 raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0) when 'pad_mode' " 1095 f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.") 1096 self.add_prim_attr("pad", pad) 1097 self.padding = pad 1098 if self.pad_mode == 'pad': 1099 for item in pad: 1100 validator.check_non_negative_int(item, 'pad item', self.name) 1101 self.mode = validator.check_equal_int(mode, 3, "mode", self.name) 1102 self.add_prim_attr('data_format', "NCHW") 1103 self.channel_multiplier = validator.check_positive_int(channel_multiplier, "channel_multiplier", self.name) 1104 self.group = validator.check_positive_int(group, "group", self.name) 1105 self.add_prim_attr('offset_a', 0) 1106 1107 def infer_shape(self, x_shape, w_shape, b_shape=None): 1108 validator.check_equal_int(len(w_shape), 4, "weight rank", self.name) 1109 validator.check_equal_int(len(x_shape), 4, "x rank", self.name) 1110 validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], validator.EQ, self.name) 1111 validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), validator.EQ, self.name) 1112 1113 kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape 1114 _, _, stride_h, stride_w = self.stride 1115 _, _, dilation_h, dilation_w = self.dilation 1116 if kernel_size_n != 1: 1117 raise ValueError(f"For '{self.name}', the batch of 'weight' must be 1, but got {kernel_size_n}") 1118 if self.pad_mode == "valid": 1119 h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h) 1120 w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w) 1121 pad_top, pad_bottom, pad_left, pad_right = 0, 0, 0, 0 1122 elif self.pad_mode == "same": 1123 h_out = math.ceil(x_shape[2] / stride_h) 1124 w_out = math.ceil(x_shape[3] / stride_w) 1125 1126 pad_needed_h = max(0, (h_out - 1) * stride_h + dilation_h * (kernel_size_h - 1) + 1 - x_shape[2]) 1127 pad_top = math.floor(pad_needed_h / 2) 1128 pad_bottom = pad_needed_h - pad_top 1129 1130 pad_needed_w = max(0, (w_out - 1) * stride_w + dilation_w * (kernel_size_w - 1) + 1 - x_shape[3]) 1131 pad_left = math.floor(pad_needed_w / 2) 1132 pad_right = pad_needed_w - pad_left 1133 elif self.pad_mode == 'pad': 1134 pad_top, pad_bottom, pad_left, pad_right = self.padding 1135 1136 h_out = 1 + (x_shape[2] + pad_top + pad_bottom - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \ 1137 / stride_h 1138 w_out = 1 + (x_shape[3] + pad_left + pad_right - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \ 1139 / stride_w 1140 h_out = math.floor(h_out) 1141 w_out = math.floor(w_out) 1142 1143 self.pad_list = (pad_top, pad_bottom, pad_left, pad_right) 1144 self.add_prim_attr('pad_list', self.pad_list) 1145 1146 out_channel = self.channel_multiplier * x_shape[1] 1147 out_shape = [x_shape[0], out_channel, h_out, w_out] 1148 return out_shape 1149 1150 def infer_dtype(self, x_dtype, w_dtype, b_dtype=None): 1151 args = {'x': x_dtype, 'w': w_dtype} 1152 validator.check_tensors_dtypes_same_and_valid(args, mstype.number_type, self.name) 1153 if x_dtype.element_type() == mstype.int8: 1154 return mstype.TensorType(mstype.int32) 1155 return x_dtype 1156 1157 1158class _Pool(PrimitiveWithInfer): 1159 r""" 1160 Performs max/avg pooling operation. 1161 1162 Args: 1163 kernel_size (Union[int, tuple[int]]): The size of the kernel, that must be a tuple 1164 of two `int` for height and width. Default: ``1`` . 1165 strides (Union[int, tuple[int]]): The stride of the window, that must be 1166 a tuple of two `int` for height and width. Default: ``1`` . 1167 pad_mode (str): The optional value for pad mode, is ``"same"`` or ``"valid"`` . 1168 Default: ``"valid"`` . 1169 data_format (str): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` . 1170 Default: ``"NCHW"`` . 1171 """ 1172 1173 @prim_attr_register 1174 def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"): 1175 """Initialize _Pool.""" 1176 self.init_prim_io_names(inputs=['x'], outputs=['output']) 1177 validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name) 1178 validator.check_value_type('strides', strides, [int, tuple], self.name) 1179 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1180 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name) 1181 self.add_prim_attr("pad_mode", self.pad_mode) 1182 self.is_maxpoolwithargmax = (self.name == "MaxPoolWithArgmax") 1183 self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name) 1184 if context.get_context("device_target") != "GPU" and self.format == "NHWC": 1185 raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, " 1186 f"but got the 'data_format' is {self.format} and " 1187 f"the platform is {context.get_context('device_target')}.") 1188 if not self.is_maxpoolwithargmax: 1189 self.add_prim_attr('data_format', self.format) 1190 1191 self.kernel_size = _check_positive_int_or_tuple( 1192 "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True) 1193 if self.is_maxpoolwithargmax: 1194 self.kernel_size = (1, self.kernel_size[-2], self.kernel_size[-1], 1) 1195 self.add_prim_attr("kernel_size", self.kernel_size) 1196 1197 self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True) 1198 if self.is_maxpoolwithargmax: 1199 self.strides = (1, self.strides[-2], self.strides[-1], 1) 1200 self.add_prim_attr("strides", self.strides) 1201 1202 def infer_shape(self, x_shape): 1203 x_shape_norm = x_shape if self.format == "NCHW" else [x_shape[0], x_shape[3], x_shape[1], x_shape[2]] 1204 validator.check_equal_int(len(x_shape_norm), 4, "x rank", self.name) 1205 batch, channel, input_h, input_w = x_shape_norm 1206 if self.is_maxpoolwithargmax: 1207 _, kernel_h, kernel_w, _ = self.kernel_size 1208 _, stride_h, stride_w, _ = self.strides 1209 else: 1210 _, _, kernel_h, kernel_w = self.kernel_size 1211 _, _, stride_h, stride_w = self.strides 1212 1213 if self.pad_mode == "VALID": 1214 if input_h == -1: 1215 out_h = -1 1216 else: 1217 out_h = math.ceil((input_h - (kernel_h - 1)) / stride_h) 1218 if input_w == -1: 1219 out_w = -1 1220 else: 1221 out_w = math.ceil((input_w - (kernel_w - 1)) / stride_w) 1222 elif self.pad_mode == "SAME": 1223 if input_h == -1: 1224 out_h = -1 1225 else: 1226 out_h = math.ceil(input_h / stride_h) 1227 if input_w == -1: 1228 out_w = -1 1229 else: 1230 out_w = math.ceil(input_w / stride_w) 1231 out_shape = [batch, channel, out_h, out_w] if self.format == "NCHW" else [batch, out_h, out_w, channel] 1232 1233 is_dynamic_shape = False 1234 for in_shape_val in x_shape_norm: 1235 if in_shape_val == -1: 1236 is_dynamic_shape = True 1237 1238 for out_shape_val in out_shape: 1239 if out_shape_val <= 0 and not is_dynamic_shape: 1240 raise ValueError(f"For '{self.name}', the each element of the output shape must be larger than 0, " 1241 f"but got output shape: {out_shape}. The input shape: {x_shape}, " 1242 f"kernel size: {self.kernel_size}, strides: {self.strides}." 1243 f"Please check the official api documents for " 1244 f"more information about the output.") 1245 return out_shape 1246 1247 def infer_dtype(self, x_dtype): 1248 validator.check_subclass("input", x_dtype, mstype.tensor_type, self.name) 1249 return x_dtype 1250 1251 1252class MaxPool(_Pool): 1253 r""" 1254 Max pooling operation. 1255 1256 Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes. 1257 1258 Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs 1259 regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size 1260 :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows: 1261 1262 .. math:: 1263 \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 1264 \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) 1265 1266 Args: 1267 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value, 1268 is an int number that represents height and width of the kernel, or a tuple 1269 of two int numbers that represent height and width respectively. Default: ``1`` . 1270 strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 1271 not only the height of movement but also the width of movement, or a tuple of two int numbers that 1272 represent height and width of movement respectively. Default: ``1`` . 1273 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1274 ``'same'`` or ``'valid'`` . Default: ``'valid'`` . 1275 1276 - ``'same'``: Pad the input around its edges so that the shape of input and output 1277 are the same when `stride` is set to ``1``. 1278 The amount of padding to is calculated by the operator internally, If the amount is even, it is 1279 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 1280 - ``'valid'``: No padding is applied to the input, and the output returns the maximum 1281 possible height and width. Extra pixels that could not complete a full stride will 1282 be discarded. 1283 1284 data_format (str) : The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` . 1285 Default: ``'NCHW'`` . 1286 1287 Inputs: 1288 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 1289 Supported dtypes: 1290 1291 - CPU: float16, float32, float64. 1292 - GPU/Ascend: float16, float32. 1293 1294 Outputs: 1295 Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`. 1296 1297 Raises: 1298 TypeError: If `kernel_size` or `strides` is neither int nor tuple. 1299 ValueError: If `pad_mode` is neither ``'valid'`` nor ``'same'`` with not case sensitive. 1300 ValueError: If `data_format` is neither ``'NCHW'`` nor ``'NHWC'``. 1301 ValueError: If `kernel_size` or `strides` is less than 1. 1302 ValueError: If length of shape of `input` is not equal to 4. 1303 1304 Supported Platforms: 1305 ``Ascend`` ``GPU`` ``CPU`` 1306 1307 Examples: 1308 >>> import mindspore 1309 >>> import numpy as np 1310 >>> from mindspore import Tensor, ops 1311 >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32) 1312 >>> maxpool_op = ops.MaxPool(pad_mode="VALID", kernel_size=2, strides=1) 1313 >>> output = maxpool_op(x) 1314 >>> print(output) 1315 [[[[ 5. 6. 7.] 1316 [ 9. 10. 11.]] 1317 [[17. 18. 19.] 1318 [21. 22. 23.]] 1319 [[29. 30. 31.] 1320 [33. 34. 35.]]]] 1321 """ 1322 1323 @prim_attr_register 1324 def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"): 1325 """Initialize MaxPool.""" 1326 super(MaxPool, self).__init__(kernel_size, strides, pad_mode, data_format) 1327 1328 1329class MaxPoolV1(Primitive): 1330 r""" 1331 Maxpooling operation. 1332 1333 Applies a 2D maxpooling over an input Tensor which can be regarded as a composition of 2D planes. 1334 1335 Typically, the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPoolV1 1336 outputs regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size 1337 :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_h, s_w)`, the operation is as follows. 1338 1339 .. math:: 1340 \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 1341 \text{input}(N_i, C_j, s_h \times h + m, s_w \times w + n) 1342 1343 Args: 1344 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value, 1345 is an integer that represents height and width of the kernel, or a tuple 1346 of two integers that represent height and width respectively. Default: ``1`` . 1347 strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents 1348 the height and width of movement are both strides, or a tuple of two integers that 1349 represent height and width of movement, respectively. Default: ``1`` . 1350 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1351 ``"same"`` or ``"valid"`` . Default: ``"valid"`` . 1352 1353 - ``"same"``: Pad the input around its edges so that the shape of input and output 1354 are the same when `stride` is set to ``1``. 1355 The amount of padding to is calculated by the operator internally, If the amount is even, it is 1356 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 1357 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 1358 possible height and width. Extra pixels that could not complete a full stride will 1359 be discarded. 1360 1361 data_format (str) : The optional value for data format, is ``'NCHW'`` or ``'NHWC'`` . 1362 Default: ``'NCHW'`` . 1363 1364 Inputs: 1365 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 1366 1367 Outputs: 1368 Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`. 1369 1370 Raises: 1371 TypeError: If `kernel_size` or `strides` is neither int nor tuple. 1372 ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive. 1373 ValueError: If `data_format` is neither 'NHWC' nor 'NCHW'. 1374 ValueError: If `kernel_size` or `strides` is less than 1. 1375 ValueError: If the length of shape of `input` is not equal to 4. 1376 1377 Supported Platforms: 1378 ``Ascend`` 1379 1380 Examples: 1381 >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32) 1382 >>> maxpoolv1_op = ops.MaxPoolV1(pad_mode="VALID", kernel_size=2, strides=1) 1383 >>> output_ = maxpoolv1_op(x) 1384 >>> print(output_) 1385 [[[[ 5. 6. 7.] 1386 [ 9. 10. 11.]] 1387 [[17. 18. 19.] 1388 [21. 22. 23.]] 1389 [[29. 30. 31.] 1390 [33. 34. 35.]]]] 1391 """ 1392 1393 @prim_attr_register 1394 def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"): 1395 """Initialize MaxPoolV1.""" 1396 self.init_prim_io_names(inputs=['x'], outputs=['output']) 1397 validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name) 1398 validator.check_value_type('strides', strides, [int, tuple], self.name) 1399 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1400 self.pad_mode = validator.check_string( 1401 pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name) 1402 self.add_prim_attr("pad_mode", self.pad_mode) 1403 self.format = validator.check_string( 1404 data_format, ['NCHW', 'NHWC'], 'format', self.name) 1405 self.add_prim_attr('data_format', self.format) 1406 1407 self.kernel_size = _check_positive_int_or_tuple( 1408 "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True) 1409 self.strides = _check_positive_int_or_tuple( 1410 "strides", strides, self.name, allow_four=False, ret_four=True) 1411 1412 kernel_size_adapted = self.kernel_size if self.format == 'NCHW' else ( 1413 self.kernel_size[0], self.kernel_size[2], self.kernel_size[3], self.kernel_size[1]) 1414 strides_adapted = self.strides if self.format == 'NCHW' else ( 1415 self.strides[0], self.strides[2], self.strides[3], self.strides[1]) 1416 1417 self.add_prim_attr("kernel_size", kernel_size_adapted) 1418 self.add_prim_attr("strides", strides_adapted) 1419 1420 1421class MaxPool3D(Primitive): 1422 r""" 1423 Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes. 1424 1425 Typically the input is of shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})`, MaxPool outputs 1426 regional maximum in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size 1427 :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows: 1428 1429 .. math:: 1430 \text{output}(N_i, C_j, d, h, w) = 1431 \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 1432 \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n) 1433 1434 Args: 1435 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value, 1436 is an int number that represents depth, height and width of the kernel, or a tuple 1437 of three int numbers that represent depth, height and width respectively. Default: ``1`` . 1438 strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 1439 not only the depth, height of movement but also the width of movement,, or a tuple of three int numbers that 1440 represent depth, height and width of movement respectively. Default: ``1`` . 1441 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1442 ``"SAME"`` , ``"VALID"`` or ``"PAD"`` . Default: ``"VALID"`` . 1443 1444 - ``"SAME"``: Pad the input around its depth/height/width dimension so that the shape of input and output 1445 are the same when `stride` is set to ``1``. 1446 The amount of padding to is calculated by the operator internally. If the amount is even, 1447 it isuniformly distributed around the input, if it is odd, the excess amount goes 1448 to the front/right/bottom side. 1449 If this mode is set, `pad_list` must be 0. 1450 - ``"VALID"``: No padding is applied to the input, and the output returns the maximum 1451 possible depth, height and width. Extra pixels that could not complete a full stride will 1452 be discarded. If this mode is set, `pad_list` must be 0. 1453 - ``"PAD"``: Pad the input with a specified amount. In this mode, the amount of padding 1454 in the depth, height and width dimension is determined by the `pad_list` parameter. 1455 If this mode is set, `pad_list` must be greater than or equal to 0. 1456 1457 pad_list (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the 1458 paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six 1459 integers, the padding of head, tail, top, bottom, left and right equals to pad[0], pad[1], pad[2], 1460 pad[3], pad[4] and pad[5] correspondingly. 1461 ceil_mode (Union[bool, None]): Whether to use ceil instead of floor to calculate output shape. 1462 Only effective in "pad" mode. 1463 When `pad_mode` is ``"pad"`` and "ceil_mode" is ``None`` , `ceil_mode` will be set as ``False``. 1464 Default: ``None`` . 1465 data_format (str) : The optional value for data format. Currently only support ``"NCDHW"`` . 1466 Default: ``"NCDHW"`` . 1467 1468 Inputs: 1469 - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`. 1470 Data type must be float16, float32 or float64. 1471 1472 Outputs: 1473 Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the data type of `x`. 1474 1475 Raises: 1476 TypeError: If `kernel_size` or `strides` is neither an int nor a tuple. 1477 TypeError: If `pad_mode` or `data_format` is not a string. 1478 ValueError: If numbers in `kernel_size` or `strides` are not positive. 1479 ValueError: If `pad_mode` is not one of ``"SAME"``, ``"VALID"`` or ``"PAD"``. 1480 ValueError: If `pad_mode` is ``"SAME"`` or ``"VALID"``, `ceil_mode` is not ``None``. 1481 ValueError: If `kernel_size` or `strides` is a tuple whose length is not equal to 3. 1482 ValueError: If `data_format` is not ``"NCDHW"``. 1483 1484 Supported Platforms: 1485 ``Ascend`` ``GPU`` ``CPU`` 1486 1487 Examples: 1488 >>> import mindspore 1489 >>> import numpy as np 1490 >>> from mindspore import Tensor, ops 1491 >>> x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float32) 1492 >>> max_pool3d = ops.MaxPool3D(kernel_size=2, strides=1, pad_mode="VALID") 1493 >>> output = max_pool3d(x) 1494 >>> print(output) 1495 [[[[[10. 11.]]] 1496 [[[22. 23.]]]]] 1497 """ 1498 1499 @prim_attr_register 1500 def __init__(self, kernel_size=1, strides=1, pad_mode="VALID", pad_list=0, ceil_mode=None, data_format="NCDHW"): 1501 """Initialize MaxPool3D.""" 1502 self.init_prim_io_names(inputs=['x'], outputs=['output']) 1503 validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name) 1504 validator.check_value_type('strides', strides, [int, tuple], self.name) 1505 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1506 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME', 'PAD'], 'pad_mode', self.name) 1507 if pad_mode.upper() == "PAD": 1508 self.pad_mode = "CALCULATED" 1509 self.add_prim_attr("pad_mode", self.pad_mode) 1510 self.data_format = validator.check_string(data_format, ['NCDHW'], 'data_format', self.name) 1511 self.kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.name, ret_five=True) 1512 self.add_prim_attr("kernel_size", self.kernel_size) 1513 self.strides = _check_3d_int_or_tuple("strides", strides, self.name, ret_five=True) 1514 self.add_prim_attr("strides", self.strides) 1515 if ceil_mode is None: 1516 self.ceil_mode = False 1517 else: 1518 self.ceil_mode = validator.check_value_type('ceil_mode', ceil_mode, [bool], self.name) 1519 if self.pad_mode != "CALCULATED": 1520 raise ValueError("When the 'pad_mode' is 'same' or 'valid', the 'ceil_mode' only supports 'None'.") 1521 self.add_prim_attr("ceil_mode", int(self.ceil_mode)) 1522 1523 validator.check_value_type('pad_list', pad_list, (int, tuple), self.name) 1524 self.pad_list = pad_list 1525 if isinstance(self.pad_list, int): 1526 self.pad_list = (self.pad_list,) * 6 1527 if len(self.pad_list) == 3: 1528 self.pad_list = (pad_list[0], pad_list[0], pad_list[1], pad_list[1], pad_list[2], pad_list[2]) 1529 if len(self.pad_list) != 3 and len(self.pad_list) != 6: 1530 raise ValueError(f"For '{self.name}', attr 'pad_list' must be an positive int number or a tuple of " 1531 f"three or six positive int numbers, but got {len(self.pad_list)} numbers.") 1532 if self.pad_mode != 'CALCULATED' and self.pad_list != (0, 0, 0, 0, 0, 0): 1533 raise ValueError(f"For '{self.name}', the 'pad_list' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' " 1534 f"is not \"pad\", but got 'pad_list' is {pad_list} and 'pad_mode' is {pad_mode}.") 1535 if self.pad_mode == 'CALCULATED': 1536 for item in self.pad_list: 1537 validator.check_non_negative_int(item, 'pad_list item', self.name) 1538 self.add_prim_attr("pad_list", self.pad_list) 1539 1540 1541class MaxUnpool2D(Primitive): 1542 r""" 1543 Calculates the partial inverse of MaxPool2D operation. 1544 1545 Since MaxPool2D loses non-maximal values, it is not fully invertible. 1546 Therefore, MaxUnpool2D takes the output of MaxPool2D, including the indices of 1547 the maximal values, and computes a partial inverse where all non-maximal values are set to zero. 1548 Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` , 1549 the output is of shape :math:`(N, C, H_{out}, W_{out})` , the operation is as follows: 1550 1551 .. math:: 1552 \begin{array}{ll} \\ 1553 H_{out} = (H{in} - 1) \times strides[0] - 2 \times pads[0] + ksize[0] \\ 1554 W_{out} = (W{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\ 1555 \end{array} 1556 1557 .. warning:: 1558 This is an experimental API that is subject to change or deletion. 1559 1560 Args: 1561 ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value, 1562 is an int number that represents height and width of the kernel, or a tuple 1563 of two int numbers that represent height and width respectively. 1564 strides (Union[int, tuple[int]], optional): The strides of kernel moving. 1565 If `strides` is 0 or (0, 0), then `strides` equal to `ksize` . Default: ``0`` . 1566 1567 - An int number that represents the height and width of movement are both `strides` . 1568 - A tuple of two int numbers that represent height and width of movement respectively. 1569 1570 pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` . 1571 1572 - If `pads` is an integer, the paddings of height and width are the same, equal to pads. 1573 - If `pads` is a tuple of two integers, the padding of height and width equal to pads[0] 1574 and pads[1] correspondingly. 1575 1576 output_shape (tuple[int], optional): The target output size is an optional input. Default: ``()`` . 1577 1578 - If :math:`output\_shape == ()` , then the shape of output computed by `kszie`, `strides` and `pads` . 1579 - If :math:`output\_shape != ()` , then `output_shape` must be :math:`(N, C, H, W)` or :math:`(N, H, W, C)` 1580 and `output_shape` must belong to :math:`[(N, C, H_{out} - strides[0], W_{out} - strides[1]), 1581 (N, C, H_{out} + strides[0], W_{out} + strides[1])]`. 1582 1583 data_format (str, optional): The optional value for data format. 1584 Currently support ``"NCHW"`` and ``"NHWC"`` . Default: ``"NCHW"`` . 1585 1586 Inputs: 1587 - **x** (Tensor) - The input Tensor to invert. 1588 Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C)`. 1589 - **argmax** (Tensor) - Max values' index represented by the `argmax`. 1590 Tensor of shape must be same with input `x`. 1591 Values of `argmax` must belong to :math:`[0, H_{in} \times W_{in} - 1]`. 1592 Data type must be in int32 or int64. 1593 1594 Outputs: 1595 Tensor, with shape :math:`(N, C, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C)`. 1596 Has the same data type with `x`. 1597 1598 Raises: 1599 TypeError: If data type of `x` or `argmax` is not supported. 1600 TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple. 1601 ValueError: If numbers in `strides` (also support 0 and (0, 0)) or `ksize` is not positive. 1602 ValueError: If numbers in `pads` is negative. 1603 ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 2. 1604 ValueError: If `data_format` is not a str or is neither `NCHW` nor `NHWC`. 1605 ValueError: If `output_shape` whose length is neither 0 or 4. 1606 ValueError: If `output_shape` is not close to output size 1607 computed by attr `ksize`, `strides` and `pads`. 1608 1609 Supported Platforms: 1610 ``Ascend`` ``GPU`` ``CPU`` 1611 1612 Examples: 1613 >>> import numpy as np 1614 >>> from mindspore import Tensor, ops 1615 >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32)) 1616 >>> argmax = Tensor(np.array([[[[0, 1], [2, 3]]]]).astype(np.int64)) 1617 >>> maxunpool2d = ops.MaxUnpool2D(ksize=1, strides=1, pads=0) 1618 >>> output = maxunpool2d(x, argmax) 1619 >>> print(output.asnumpy()) 1620 [[[[0. 1.] 1621 [8. 9.]]]] 1622 """ 1623 1624 @prim_attr_register 1625 def __init__(self, ksize, strides=0, pads=0, output_shape=(), data_format="NCHW"): 1626 """Initialize MaxUnpool2D.""" 1627 self.init_prim_io_names(inputs=['x', 'argmax'], outputs=['y']) 1628 self.ksize = _check_positive_int_or_tuple('ksize', ksize, self.name, ret_four=True) 1629 if strides in (0, (0, 0)): 1630 strides = ksize 1631 self.strides = _check_positive_int_or_tuple('strides', strides, self.name, ret_four=True) 1632 self.pads = _check_positive_int_or_tuple('pads', pads, self.name, ret_four=True, strict_positive=False) 1633 self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name) 1634 1635 if data_format == "NHWC": 1636 self.ksize = (self.ksize[0], self.ksize[2], self.ksize[3], self.ksize[1]) 1637 self.strides = (self.strides[0], self.strides[2], self.strides[3], self.strides[1]) 1638 self.pads = (self.pads[0], self.pads[2], self.pads[3], self.pads[1]) 1639 1640 self.add_prim_attr('ksize', self.ksize) 1641 self.add_prim_attr('strides', self.strides) 1642 self.add_prim_attr('pads', self.pads) 1643 1644 validator.check_value_type("output_shape", output_shape, [tuple], self.name) 1645 self.output_shape = output_shape 1646 1647 1648class MaxUnpool3D(Primitive): 1649 r""" 1650 Computes the inverse of :class:`mindspore.ops.MaxPool3D`. 1651 1652 MaxUnpool3D keeps the maximal value and set all position of non-maximal values to zero. 1653 Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, the output is of 1654 shape :math:`(N, C, D_{out}, H_{out}, W_{out})`, the operation is as follows. 1655 1656 .. math:: 1657 \begin{array}{ll} \\ 1658 D_{out} = (D{in} - 1) \times strides[0] - 2 \times pads[0] + ksize[0] \\ 1659 H_{out} = (H{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\ 1660 W_{out} = (W{in} - 1) \times strides[2] - 2 \times pads[2] + ksize[2] \\ 1661 \end{array} 1662 1663 .. warning:: 1664 This is an experimental API that is subject to change or deletion. 1665 1666 Args: 1667 ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value, 1668 is an int number that represents depth, height and width of the kernel, or a tuple 1669 of three int numbers that represent depth, height and width respectively. 1670 strides (Union[int, tuple[int]], optional): The distance of kernel moving. Default: ``0`` . 1671 1672 - If it is an int number, the depth, height and width of movement are all equal to `strides`. 1673 - If it is a tuple of three int numbers, they represent depth, height and width of movement respectively. 1674 - If strides is 0 or (0, 0, 0), then `strides` equal to `ksize`. 1675 1676 pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` . 1677 1678 - If `pads` is an integer, the paddings of depth, height and width are the same, equal to pads. 1679 - If `pads` is a tuple of three integers, the padding of depth, height and width equal to pads[0], 1680 pads[1] and pads[2] correspondingly. 1681 1682 output_shape (tuple[int], optional) : The target output size. Default: ``()`` . 1683 If :math:`output\_shape == ()`, then the shape of output computed by kszie, strides and pads shown above. 1684 If :math:`output\_shape != ()`, then output_shape format must be :math:`(N, C, D, H, W)` or 1685 :math:`(N, D, H, W, C)` and output_shape must be in range 1686 :math:`[(N, C, D_{out} - strides[0], H_{out} - strides[1], W_{out} - strides[2]), 1687 (N, C, D_{out} + strides[0], H_{out} + strides[1], W_{out} + strides[2])]`. 1688 data_format (str, optional) : The optional value for data format. Currently 1689 support ``'NCDHW'`` and ``'NDHWC'`` . Default: ``'NCDHW'`` . 1690 1691 Inputs: 1692 - **x** (Tensor) - The input Tensor to invert. 1693 Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`. 1694 - **argmax** (Tensor) - Max values' index. Tensor that has the same shape as `x`. 1695 Values of `argmax` must be in range :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`. 1696 Data type must be int32 or int64. 1697 1698 Outputs: 1699 Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`. 1700 Has the same data type with `x`. 1701 1702 Raises: 1703 TypeError: If data type of `x` or `argmax` is Number. 1704 TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple. 1705 ValueError: If numbers in `strides` or `ksize` is negative. 1706 ValueError: If numbers in `pads` is negative. 1707 ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 3. 1708 ValueError: If `data_format` is not a str or is neither ``'NCDHW'`` nor ``'NDHWC'``. 1709 ValueError: If `output_shape` whose length is neither 0 or 5. 1710 ValueError: If `output_shape` is not close to output size range 1711 computed by attr `ksize, strides, pads`. 1712 1713 Supported Platforms: 1714 ``Ascend`` ``GPU`` ``CPU`` 1715 1716 Examples: 1717 >>> import numpy as np 1718 >>> from mindspore import Tensor, ops 1719 >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32)) 1720 >>> argmax = Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64)) 1721 >>> maxunpool3d = ops.MaxUnpool3D(ksize=1, strides=1, pads=0) 1722 >>> output = maxunpool3d(x, argmax) 1723 >>> print(output.asnumpy()) 1724 [[[[[0. 1.] 1725 [8. 9.]]]]] 1726 """ 1727 1728 @prim_attr_register 1729 def __init__(self, ksize, strides=0, pads=0, output_shape=(), data_format="NCDHW"): 1730 """Initialize MaxUnpool3D.""" 1731 self.init_prim_io_names(inputs=['x', 'argmax'], outputs=['y']) 1732 self.ksize = _check_3d_int_or_tuple('ksize', ksize, self.name, ret_five=True) 1733 if strides in (0, (0, 0, 0)): 1734 strides = ksize 1735 self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True) 1736 self.pads = _check_3d_int_or_tuple('pads', pads, self.name, ret_five=True, greater_zero=False) 1737 self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name) 1738 if data_format == "NDHWC": 1739 self.ksize = (self.ksize[0], self.ksize[2], self.ksize[3], self.ksize[4], self.ksize[1]) 1740 self.strides = (self.strides[0], self.strides[2], self.strides[3], self.strides[4], self.strides[1]) 1741 self.pads = (self.pads[0], self.pads[2], self.pads[3], self.pads[4], self.pads[1]) 1742 1743 self.add_prim_attr('ksize', self.ksize) 1744 self.add_prim_attr('strides', self.strides) 1745 self.add_prim_attr('pads', self.pads) 1746 1747 validator.check_value_type("output_shape", output_shape, [tuple], self.name) 1748 self.output_shape = output_shape 1749 1750 1751class AvgPoolV1(Primitive): 1752 r""" 1753 Average-pooling operation. 1754 1755 Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D planes. 1756 Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPoolV1 outputs 1757 regional average in the :math:`(H_{in}, W_{in})`-dimension. Given window size 1758 :math:`ks = (h_{ker}, w_{ker})` and strides :math:`s = (s_0, s_1)`, the operation is as follows. 1759 1760 .. math:: 1761 \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1} 1762 \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) 1763 1764 .. warning:: 1765 - Only single input and single output are supported. 1766 - Global average pooling is supported. 1767 - The height of "kernel_size" and the weight of "kernel_size" are positive integers within the range [1, 255]. 1768 ksize_h * ksize_w < 256. 1769 - Due to instruction restrictions, the values of "strides_h" and "strides_w" are 1770 positive integers within the range [1, 64). 1771 1772 Args: 1773 kernel_size (Union[int, tuple[int]]): The size of the kernel used to take the average value, 1774 is an integer that represents height and width of the kernel, or a tuple 1775 of two integers that represent height and width respectively. Default: ``1`` . 1776 strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents 1777 the height and width of movement are both strides, or a tuple of two integers that 1778 represent height and width of movement, respectively. Default: ``1`` . 1779 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1780 ``"same"`` or ``"valid"`` . Default: ``"valid"`` . 1781 1782 - ``"same"``: Pad the input around its edges so that the shape of input and output 1783 are the same when `stride` is set to ``1``. 1784 The amount of padding to is calculated by the operator internally, If the amount is even, it is 1785 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 1786 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 1787 possible height and width. Extra pixels that could not complete a full stride will 1788 be discarded. 1789 1790 data_format (str): The format of input and output data. Should be ``'NHWC'`` or ``'NCHW'`` . 1791 Default: ``'NCHW'`` . 1792 1793 Inputs: 1794 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 1795 1796 Outputs: 1797 Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`. 1798 1799 Raises: 1800 TypeError: If `kernel_size` or `strides` is neither int nor tuple. 1801 ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive. 1802 ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'. 1803 ValueError: If `kernel_size` or `strides` is less than 1. 1804 ValueError: If length of shape of `x` is not equal to 4. 1805 1806 Supported Platforms: 1807 ``Ascend`` 1808 1809 Examples: 1810 >>> x = Tensor(np.arange(1 * 2 * 4 * 4).reshape((1, 2, 4, 4)), mindspore.float64) 1811 >>> avgpoolv1_op = ops.AvgPoolV1(pad_mode="VALID", kernel_size=3, strides=1) 1812 >>> _output = avgpoolv1_op(x) 1813 >>> print(_output) 1814 [[[[ 5. 6.] 1815 [ 9. 10.]] 1816 [[21. 22.] 1817 [25. 26.]]]] 1818 """ 1819 1820 @prim_attr_register 1821 def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"): 1822 """Initialize AvgPoolV1.""" 1823 self.init_prim_io_names(inputs=['x'], outputs=['output']) 1824 validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name) 1825 validator.check_value_type('strides', strides, [int, tuple], self.name) 1826 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1827 self.pad_mode = validator.check_string( 1828 pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name) 1829 self.add_prim_attr("pad_mode", self.pad_mode) 1830 self.format = validator.check_string( 1831 data_format, ['NCHW', 'NHWC'], 'format', self.name) 1832 self.add_prim_attr('data_format', self.format) 1833 self.kernel_size = _check_positive_int_or_tuple( 1834 "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True) 1835 self.strides = _check_positive_int_or_tuple( 1836 "strides", strides, self.name, allow_four=False, ret_four=True) 1837 1838 # adapt data_format 1839 self.kernel_size_adapted = self.kernel_size if self.format == "NCHW" else ( 1840 self.kernel_size[0], self.kernel_size[2], self.kernel_size[3], self.kernel_size[1]) 1841 self.add_prim_attr("kernel_size", self.kernel_size_adapted) 1842 self.strides_adapted = self.strides if self.format == "NCHW" else ( 1843 self.strides[0], self.strides[2], self.strides[3], self.strides[1]) 1844 self.add_prim_attr("strides", self.strides_adapted) 1845 1846 1847class Conv2DBackpropInput(Primitive): 1848 r""" 1849 The Conv2DBackpropInput interface is deprecated, please refer to :class:`mindspore.ops.Conv2DTranspose` if you 1850 want to do unsampling. 1851 1852 Supported Platforms: 1853 Deprecated 1854 """ 1855 __mindspore_signature__ = ( 1856 sig.make_sig('out_backprop', dtype=sig.sig_dtype.T), 1857 sig.make_sig('filter', dtype=sig.sig_dtype.T1), 1858 sig.make_sig('input_sizes', dtype=sig.sig_dtype.T2) 1859 ) 1860 1861 @prim_attr_register 1862 def __init__(self, 1863 out_channel, 1864 kernel_size, 1865 pad_mode="valid", 1866 pad=0, 1867 pad_list=None, 1868 mode=1, 1869 stride=1, 1870 dilation=1, 1871 group=1, 1872 data_format="NCHW"): 1873 """Initialize Conv2DBackpropInput""" 1874 self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output']) 1875 self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) 1876 self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) 1877 self.add_prim_attr('kernel_size', self.kernel_size) 1878 self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name) 1879 if context.get_context("device_target") != "GPU" and self.format == "NHWC": 1880 raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, " 1881 f"but got the 'data_format' is {self.format} and " 1882 f"the platform is {context.get_context('device_target')}.") 1883 self.add_prim_attr('data_format', self.format) 1884 self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True) 1885 self.stride = _update_attr_by_format(self.stride, self.format) 1886 self.add_prim_attr('stride', self.stride) 1887 self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True) 1888 self.dilation = _update_attr_by_format(self.dilation, self.format) 1889 self.add_prim_attr('dilation', self.dilation) 1890 validator.check_value_type('pad', pad, (int, tuple), self.name) 1891 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 1892 if isinstance(pad, int): 1893 pad = (pad,) * 4 1894 else: 1895 validator.check_equal_int(len(pad), 4, 'pad size', self.name) 1896 self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) 1897 if pad_mode != 'pad' and pad != (0, 0, 0, 0): 1898 raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0) when 'pad_mode' " 1899 f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.") 1900 self.add_prim_attr("pad", pad) 1901 self.padding = pad 1902 if self.pad_mode == 'pad': 1903 for item in pad: 1904 validator.check_non_negative_int(item, 'pad item', self.name) 1905 1906 pad_mode = pad_mode.upper() 1907 self.add_prim_attr('pad_mode', pad_mode) 1908 self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) 1909 self.group = validator.check_positive_int(group, 'group', self.name) 1910 self.add_prim_attr('groups', self.group) 1911 if pad_list: 1912 for x in pad_list: 1913 if x != -1: 1914 validator.check_non_negative_int(x, 'element of pad_list', self.name) 1915 self.pad_list = pad_list 1916 1917 1918class MaxPool3DWithArgmax(Primitive): 1919 r""" 1920 Performs a 3D max pooling on the input Tensor and returns both max values and indices. 1921 1922 Typically the input is a Tensor with shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})`, outputs 1923 regional maximum in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given `ksize` 1924 :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and `strides` :math:`s = (s_0, s_1, s_2)`, the operation is as follows. 1925 1926 .. math:: 1927 \text{output}(N_i, C_j, d, h, w) = 1928 \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 1929 \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n) 1930 1931 The output is a Tensor with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})` and its depth, height and 1932 width are: 1933 1934 .. math:: 1935 \begin{array}{ll} \\ 1936 D_{out} = \frac{D_{in} + 2 \times \text{pads}[0] - \text{dilation}[0] \times (\text{ksize}[0] - 1) - 1} 1937 {\text{stride}[0]} + 1 \\ 1938 H_{out} = \frac{H_{in} + 2 \times \text{pads}[1] - \text{dilation}[1] \times (\text{ksize}[1] - 1) - 1} 1939 {\text{stride}[1]} + 1 \\ 1940 W_{out} = \frac{W_{in} + 2 \times \text{pads}[2] - \text{dilation}[2] \times (\text{ksize}[2] - 1) - 1} 1941 {\text{stride}[2]} + 1 \\ 1942 \end{array} 1943 1944 .. warning:: 1945 This is an experimental API that is subject to change or deletion. 1946 1947 Args: 1948 ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg 1949 value, is an int number that represents depth, height and width of the kernel, or a tuple of 1950 three int numbers that represent depth, height and width respectively. 1951 strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents the depth, 1952 height and width of movement are both strides, or a tuple of three int numbers that 1953 represent depth, height and width of movement respectively. 1954 pads (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both 1955 strides, or a tuple of three int numbers that represent depth, height and width of movement respectively. 1956 dilation (Union[int, tuple[int]]): Default: ``(1, 1, 1)`` . 1957 ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` . 1958 data_format (str) : The optional value for data format. Currently only support ``'NCDHW'`` . 1959 Default: ``'NCDHW'`` . 1960 argmax_type (mindspore.dtype) : The dtype for argmax. Default: ``mstype.int64`` . 1961 1962 Inputs: 1963 - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})` with data type of int8, 1964 int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64. 1965 1966 Outputs: 1967 Tuple of 2 Tensors, representing the maxpool result and where the max values are generated. 1968 1969 - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`. 1970 It has the same data type as `x`. 1971 - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32 or int64. 1972 1973 Raises: 1974 TypeError: If `x` is not a Tensor. 1975 ValueError: If length of shape of `x` is not equal to 5. 1976 TypeError: If `ksize` , `strides` , `pads` or `dilation` is not int or tuple. 1977 ValueError: If `ksize` or `strides` is less than 1. 1978 ValueError: If `pads` is less than 0. 1979 ValueError: If `data_format` is not ``'NCDHW'``. 1980 ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32. 1981 1982 Supported Platforms: 1983 ``Ascend`` ``GPU`` ``CPU`` 1984 1985 Examples: 1986 >>> import mindspore 1987 >>> import numpy as np 1988 >>> from mindspore import Tensor, ops 1989 >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32) 1990 >>> max_pool3d_with_arg_op = ops.MaxPool3DWithArgmax(ksize=2, strides=1, pads=1) 1991 >>> output_tensor, argmax = max_pool3d_with_arg_op(x) 1992 >>> print(output_tensor.shape) 1993 (2, 1, 3, 3, 3) 1994 >>> print(argmax.shape) 1995 (2, 1, 3, 3, 3) 1996 """ 1997 1998 @prim_attr_register 1999 def __init__(self, ksize, strides, pads, dilation=(1, 1, 1), ceil_mode=False, 2000 data_format='NCDHW', argmax_type=mstype.int64): 2001 """Initialize MaxPool3DWithArgmax.""" 2002 self.init_prim_io_names(inputs=['x'], outputs=['y', 'argmax']) 2003 validator.check_value_type('ceil_mode', ceil_mode, bool, self.name) 2004 validator.check_value_type('data_format', data_format, str, self.name) 2005 validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name) 2006 argmax_type_valid_values = (mstype.int32, mstype.int64) 2007 validator.check_type_name( 2008 "argmax_type", argmax_type, argmax_type_valid_values, self.name) 2009 self.data_format = validator.check_string( 2010 data_format, ['NCDHW'], 'data_format', self.name) 2011 if argmax_type == mstype.int32: 2012 self.add_prim_attr('argmax_type', 'int32') 2013 elif argmax_type == mstype.int64: 2014 self.add_prim_attr('argmax_type', 'int64') 2015 else: 2016 raise ValueError(f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, " 2017 f"but got {self.argmax_type}.") 2018 self.ksize = _check_3d_int_or_tuple("ksize", ksize, self.name, ret_five=False) 2019 self.add_prim_attr('ksize', self.ksize) 2020 self.strides = _check_3d_int_or_tuple("strides", strides, self.name, ret_five=False) 2021 self.add_prim_attr('strides', self.strides) 2022 self.pads = _check_3d_int_or_tuple("pads", pads, self.name, greater_zero=False, ret_five=False) 2023 self.add_prim_attr('pads', self.pads) 2024 self.dilation = _check_3d_int_or_tuple("dilation", dilation, self.name, allow_five=True, ret_five=False) 2025 self.add_prim_attr('dilation', self.dilation) 2026 2027 2028class Conv2DTranspose(Conv2DBackpropInput): 2029 """ 2030 Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input, 2031 also called deconvolution, although it is not an actual deconvolution. Because it cannot restore 2032 the original input data completely, but it can restore the shape of the original input. 2033 2034 Args: 2035 out_channel (int): The dimensionality of the output space. 2036 kernel_size (Union[int, tuple[int]]): The size of the convolution window. 2037 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 2038 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 2039 2040 - ``"same"``: Pad the input around its edges so that the shape of input and output 2041 are the same when `stride` is set to ``1``. 2042 The amount of padding to is calculated by the operator internally, If the amount is even, it is 2043 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 2044 If this mode is set, `pad` must be 0. 2045 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 2046 possible height and width. Extra pixels that could not complete a full stride will 2047 be discarded. If this mode is set, `pad` must be 0. 2048 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 2049 in the height and width directions is determined by the `pad` parameter. 2050 If this mode is set, `pad` must be greater than or equal to 0. 2051 2052 Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`. 2053 pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings 2054 of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, 2055 the padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] 2056 correspondingly. 2057 pad_list (Union[str, None]): The pad list like (top, bottom, left, right). Default: ``None`` . 2058 mode (int): Modes for different convolutions. The value is currently not used. Default: ``1`` . 2059 stride (Union[int, tuple[int]]): The stride to be applied to the convolution filter. Default: ``1`` . 2060 dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution. 2061 Default: ``1`` . 2062 group (int): Splits input into groups. Default: ``1`` . 2063 data_format (str): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` . 2064 Default is ``'NCHW'`` . 2065 2066 Inputs: 2067 - **dout** (Tensor) - the gradients with respect to the output of the convolution. 2068 The shape conforms to the default data_format :math:`(N, C_{out}, H_{out}, W_{out})`. 2069 - **weight** (Tensor) - Set size of kernel is :math:`(K_1, K_2)`, then the shape is 2070 :math:`(C_{out}, C_{in}, K_1, K_2)`. 2071 - **input_size** (Tensor) - A tuple describes the shape of the input which conforms to the format 2072 :math:`(N, C_{in}, H_{in}, W_{in})`. 2073 2074 Outputs: 2075 Tensor, the gradients with respect to the input of convolution. It has the same shape as the input. 2076 2077 Raises: 2078 TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple. 2079 TypeError: If `out_channel` or `group` is not an int. 2080 ValueError: If `kernel_size`, `stride` or `dilation` is less than 1. 2081 ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``. 2082 ValueError: If `padding` is a tuple whose length is not equal to 4. 2083 ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to (0, 0, 0, 0). 2084 ValueError: If `data_format` is neither ``'NCHW'`` nor ``'NHWC'``. 2085 2086 Supported Platforms: 2087 ``Ascend`` ``GPU`` ``CPU`` 2088 2089 Examples: 2090 >>> import mindspore 2091 >>> import numpy as np 2092 >>> from mindspore import Tensor, ops 2093 >>> dout = Tensor(np.ones([10, 32, 30, 30]), mindspore.float32) 2094 >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) 2095 >>> x = Tensor(np.ones([10, 32, 32, 32])) 2096 >>> conv2d_transpose_input = ops.Conv2DTranspose(out_channel=32, kernel_size=3) 2097 >>> output = conv2d_transpose_input(dout, weight, ops.shape(x)) 2098 >>> print(output.shape) 2099 (10, 32, 32, 32) 2100 """ 2101 2102 @prim_attr_register 2103 def __init__(self, out_channel, kernel_size, pad_mode="valid", pad=0, 2104 pad_list=None, mode=1, stride=1, dilation=1, group=1, data_format="NCHW"): 2105 """Initialize Conv2DTranspose.""" 2106 super(Conv2DTranspose, self).__init__(out_channel, kernel_size, pad_mode, pad, 2107 pad_list, mode, stride, dilation, group, data_format) 2108 2109 2110class SoftmaxCrossEntropyWithLogits(Primitive): 2111 r""" 2112 Gets the softmax cross-entropy value between logits and labels with one-hot encoding. 2113 2114 The updating formulas of SoftmaxCrossEntropyWithLogits algorithm are as follows, 2115 2116 .. math:: 2117 \begin{array}{ll} \\ 2118 p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\ 2119 loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})} 2120 \end{array} 2121 2122 where :math:`X` represents `logits`. 2123 :math:`Y` represents `label`. 2124 :math:`loss` represents `output`. 2125 2126 Inputs: 2127 - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32. 2128 - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`. 2129 2130 Outputs: 2131 Tuple of 2 tensors(loss, dlogits), the `loss` shape is :math:`(N,)`, 2132 and the `dlogits` with the same shape as `logits`. 2133 2134 Raises: 2135 TypeError: If dtype of `logits` or `labels` is neither float16 nor float32. 2136 TypeError: If `logits` or `labels` is not a Tensor. 2137 ValueError: If shape of `logits` is not the same as `labels`. 2138 2139 Supported Platforms: 2140 ``Ascend`` ``GPU`` ``CPU`` 2141 2142 Examples: 2143 >>> import mindspore 2144 >>> from mindspore import Tensor, ops 2145 >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) 2146 >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32) 2147 >>> softmax_cross = ops.SoftmaxCrossEntropyWithLogits() 2148 >>> loss, dlogits = softmax_cross(logits, labels) 2149 >>> print(loss) 2150 [0.5899297 0.52374405] 2151 >>> print(dlogits) 2152 [[ 0.02760027 0.20393994 0.01015357 0.20393994 -0.44563377] 2153 [ 0.08015892 0.02948882 0.08015892 -0.4077012 0.21789455]] 2154 """ 2155 2156 @prim_attr_register 2157 def __init__(self): 2158 pass 2159 2160 2161class SparseSoftmaxCrossEntropyWithLogits(Primitive): 2162 r""" 2163 Computes the softmax cross-entropy value between logits and sparse encoding labels. 2164 2165 Sets input logits as `X`, input label as `Y`, output as `loss`. Then, 2166 2167 .. math:: 2168 \begin{array}{ll} \\ 2169 p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\ 2170 loss_{ij} = \begin{cases} -ln(p_{ij}), &j = y_i \cr 0, & j \neq y_i \end{cases} \\ 2171 loss = \sum_{ij} loss_{ij} 2172 \end{array} 2173 2174 Args: 2175 is_grad (bool): If ``True`` , this operation returns the computed gradient. Default: ``False`` . 2176 2177 Inputs: 2178 - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32. 2179 - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`. 2180 Data type must be int32 or int64. 2181 2182 Outputs: 2183 Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor; 2184 if `is_grad` is ``True`` , the output tensor is the gradient of input with the same shape as `logits`. 2185 2186 Raises: 2187 TypeError: If `is_grad` is not a bool. 2188 TypeError: If dtype of `logits` is neither float16 nor float32. 2189 TypeError: If dtype of `labels` is neither int32 nor int64. 2190 ValueError: If :math:`logits.shape[0] != labels.shape[0]`. 2191 2192 Supported Platforms: 2193 ``GPU`` ``CPU`` 2194 2195 Examples: 2196 >>> import mindspore 2197 >>> from mindspore import Tensor, ops 2198 >>> logits = Tensor([[2, 3, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) 2199 >>> labels = Tensor([0, 1], mindspore.int32) 2200 >>> sparse_softmax_cross = ops.SparseSoftmaxCrossEntropyWithLogits() 2201 >>> loss = sparse_softmax_cross(logits, labels) 2202 >>> print(loss) 2203 3.4878292 2204 >>> sparse_softmax_cross_grad = ops.SparseSoftmaxCrossEntropyWithLogits(is_grad=True) 2205 >>> loss_grad = sparse_softmax_cross_grad(logits, labels) 2206 >>> print(loss_grad) 2207 [[-0.48415753 0.04306427 0.00582811 0.11706084 0.3182043 ] 2208 [ 0.04007946 -0.4852556 0.04007946 0.2961494 0.10894729]] 2209 """ 2210 2211 @prim_attr_register 2212 def __init__(self, is_grad=False): 2213 """Initialize SparseSoftmaxCrossEntropyWithLogits.""" 2214 validator.check_value_type('is_grad', is_grad, [bool], self.name) 2215 self.init_prim_io_names(inputs=['features', 'labels'], outputs=['output']) 2216 self.is_grad = is_grad 2217 self.add_prim_attr('sens', 1.0) 2218 2219 2220class SparseSoftmaxCrossEntropyWithLogitsV2(Primitive): 2221 r""" 2222 Computes the softmax cross-entropy value between logits and sparse encoding labels. 2223 2224 Sets input logits as `X`, input label as `Y`, output as `loss`. Then, 2225 2226 .. math:: 2227 \begin{array}{ll} \\ 2228 p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\ 2229 loss_{ij} = \begin{cases} -ln(p_{ij}), &j = y_i \cr 0, & j \neq y_i \end{cases} 2230 \end{array} 2231 2232 Inputs: 2233 - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32. 2234 - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`. 2235 Data type must be int32 or int64. 2236 2237 Outputs: 2238 - **loss** (Tensor) - With the same shape as `labels`, the same type as `logits`. 2239 - **backprop** (Tensor) - With the same shape and same type as `logits`. 2240 2241 Raises: 2242 TypeError: If dtype of `logits` is neither float16 nor float32. 2243 TypeError: If dtype of `labels` is neither int32 nor int64. 2244 ValueError: If logits.shape is not [batch x classes] or labels.shape is not [batch]. 2245 2246 Supported Platforms: 2247 ``Ascend`` ``CPU`` 2248 2249 Examples: 2250 >>> logits = Tensor([[2, 3, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) 2251 >>> labels = Tensor([0, 1], mindspore.int32) 2252 >>> sparse_softmax_cross = ops.SparseSoftmaxCrossEntropyWithLogitsV2() 2253 >>> loss, backprop = sparse_softmax_cross(logits, labels) 2254 >>> print(loss) 2255 [3.4519143 3.523744 ] 2256 >>> print(backprop) 2257 [[-0.96831506 0.08612854 0.01165623 0.23412165 0.6364086 ] 2258 [ 0.08015893 -0.9705112 0.08015893 0.5922988 0.21789455]] 2259 """ 2260 2261 @prim_attr_register 2262 def __init__(self): 2263 """Initialize SparseSoftmaxCrossEntropyWithLogitsV2.""" 2264 self.init_prim_io_names(inputs=['features', 'labels'], outputs=['loss', 'backprop']) 2265 2266 2267class ApplyMomentum(Primitive): 2268 r""" 2269 Optimizer that implements the Momentum algorithm. 2270 2271 Refer to the paper `On the importance of initialization and momentum in deep 2272 learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_ for more details. 2273 2274 Inputs of `variable`, `accumulation` and `gradient` comply with the implicit type conversion rules 2275 to make the data types consistent. 2276 If they have different data types, the lower priority data type will be converted to 2277 the relatively highest priority data type. 2278 2279 Refer to :class:`mindspore.nn.Momentum` for more details about the formula and usage. 2280 2281 Args: 2282 use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors 2283 from being updated. Default: ``False`` . 2284 use_nesterov (bool): Enable Nesterov momentum. Default: ``False`` . 2285 gradient_scale (float): The scale of the gradient. Default: ``1.0`` . 2286 2287 Inputs: 2288 - **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float, float16, 2289 int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128. 2290 - **accumulation** (Parameter) - Accumulated gradient value by moment weight, 2291 has the same data type with `variable`. 2292 - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float, 2293 float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or 2294 a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8, 2295 complex64, complex128 data type. 2296 - **gradient** (Tensor) - Gradient, has the same data type as `variable`. 2297 - **momentum** (Union[Number, Tensor]) - Momentum, must be a float64, int64, float, float16, int16, int32, 2298 int8, uint16, uint32, uint64, uint8, complex64, complex128 number or 2299 a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8, 2300 complex64, complex128 data type. 2301 2302 Outputs: 2303 Tensor, parameters to be updated. 2304 2305 Raises: 2306 TypeError: If the `use_locking` or `use_nesterov` is not a bool or `gradient_scale` is not a float. 2307 TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 2308 2309 Supported Platforms: 2310 ``Ascend`` ``GPU`` ``CPU`` 2311 2312 Examples: 2313 >>> import mindspore 2314 >>> import numpy as np 2315 >>> from mindspore import Tensor, nn, ops, Parameter 2316 >>> class Net(nn.Cell): 2317 ... def __init__(self): 2318 ... super(Net, self).__init__() 2319 ... self.apply_momentum = ops.ApplyMomentum() 2320 ... self.variable = Parameter(Tensor(np.array([[0.6, 0.4], 2321 ... [0.1, 0.5]]).astype(np.float32)), name="variable") 2322 ... self.accumulate = Parameter(Tensor(np.array([[0.6, 0.5], 2323 ... [0.2, 0.6]]).astype(np.float32)), name="accumulate") 2324 ... def construct(self, lr, grad, moment): 2325 ... out = self.apply_momentum(self.variable, self.accumulate, lr, grad, moment) 2326 ... return out 2327 >>> net = Net() 2328 >>> lr = Tensor(0.1, mindspore.float32) 2329 >>> moment = Tensor(0.9, mindspore.float32) 2330 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 2331 >>> output = net(lr, grad, moment) 2332 >>> print(output) 2333 [[0.51600003 0.285 ] 2334 [0.072 0.366 ]] 2335 """ 2336 __mindspore_signature__ = ( 2337 sig.make_sig('variable', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 2338 sig.make_sig('accumulation', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 2339 sig.make_sig('learning_rate', dtype=sig.sig_dtype.T1), 2340 sig.make_sig('gradient', dtype=sig.sig_dtype.T), 2341 sig.make_sig('momentum', dtype=sig.sig_dtype.T2) 2342 ) 2343 2344 @prim_attr_register 2345 def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0): 2346 """Initialize ApplyMomentum.""" 2347 self.use_nesterov = validator.check_bool(use_nesterov, "use_nesterov", self.name) 2348 self.use_locking = validator.check_bool(use_locking, "use_locking", self.name) 2349 validator.check_value_type('gradient_scale', gradient_scale, [float], self.name) 2350 self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'], 2351 outputs=['output']) 2352 self.add_prim_attr('side_effect_mem', True) 2353 2354 2355class SmoothL1Loss(Primitive): 2356 r""" 2357 Calculate the smooth L1 loss, and the L1 loss function has robustness. 2358 2359 Refer to :func:`mindspore.ops.smooth_l1_loss` for more details. 2360 2361 Args: 2362 beta (float, optional): A parameter used to control the point where the function will change between 2363 L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` . 2364 reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , 2365 ``'sum'`` . Default: ``'none'`` . 2366 2367 - ``'none'``: no reduction will be applied. 2368 - ``'mean'``: compute and return the mean of elements in the output. 2369 - ``'sum'``: the output elements will be summed. 2370 2371 Inputs: 2372 - **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64. 2373 - **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`. 2374 2375 Outputs: 2376 Tensor, loss float tensor, same shape and dtype as the `logits`. 2377 2378 Supported Platforms: 2379 ``Ascend`` ``GPU`` ``CPU`` 2380 2381 Examples: 2382 >>> import mindspore 2383 >>> import numpy as np 2384 >>> from mindspore import Tensor, ops 2385 >>> loss = ops.SmoothL1Loss() 2386 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 2387 >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32) 2388 >>> output = loss(logits, labels) 2389 >>> print(output) 2390 [0. 0. 0.5] 2391 """ 2392 2393 @prim_attr_register 2394 def __init__(self, beta=1.0, reduction='none'): 2395 """Initialize SmoothL1Loss.""" 2396 validator.check_value_type('beta', beta, [float], self.name) 2397 validator.check('beta', beta, '', 0, validator.GT, self.name) 2398 validator.check_string( 2399 reduction, ['none', 'sum', 'mean'], 'reduction', self.name) 2400 self.add_prim_attr('sigma', self.beta) 2401 self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output']) 2402 2403 2404class MultiMarginLoss(Primitive): 2405 r""" 2406 Creates a loss function that minimizes the hinge loss 2407 for multi-class classification tasks. 2408 The loss is calculated by comparing the input and output of the function. 2409 2410 .. warning:: 2411 This is an experimental API that is subject to change or deletion. 2412 2413 Refer to :func:`mindspore.ops.multi_margin_loss` for more details. 2414 2415 Args: 2416 p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` . 2417 margin (int, optional): A parameter to change pairwise distance. Default: ``1.0`` . 2418 reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , 2419 ``'sum'`` . Default: ``'mean'`` . 2420 2421 - ``'none'``: no reduction will be applied. 2422 - ``'mean'``: compute and return the weighted mean of elements in the output. 2423 - ``'sum'``: the output elements will be summed. 2424 2425 Inputs: 2426 - **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16 2427 or float64. 2428 - **target** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The 2429 value of target should be non-negative, less than C. 2430 - **weight** (Tensor, optional) - The rescaling weight to each class with shape :math:`(C,)`. Data type only 2431 support float16, float32 or float64. 2432 2433 Outputs: 2434 Tensor, When `reduction` is ``'none'``, the shape is :math:`(N,)`. 2435 Otherwise, it is a scalar. Has the same data type with `inputs`. 2436 2437 Supported Platforms: 2438 ``Ascend`` ``GPU`` ``CPU`` 2439 2440 Examples: 2441 >>> import mindspore 2442 >>> import numpy as np 2443 >>> from mindspore import Tensor, ops 2444 >>> x = Tensor(np.ones(shape=[3, 3]), mindspore.float32) 2445 >>> target = Tensor(np.array([1, 2, 1]), mindspore.int64) 2446 >>> weight = Tensor(np.array([1, 1, 1]), mindspore.float32) 2447 >>> loss = ops.MultiMarginLoss() 2448 >>> output = loss(x, target, weight) 2449 >>> print(output) 2450 0.6666667 2451 """ 2452 __mindspore_signature__ = ( 2453 sig.make_sig('x'), 2454 sig.make_sig('target'), 2455 sig.make_sig('weight', default=None) 2456 ) 2457 2458 @prim_attr_register 2459 def __init__(self, p=1, margin=1.0, reduction="mean"): 2460 """Initialize MultiMarginLoss""" 2461 self.p = validator.check_value_type('p', p, [int], self.name) 2462 validator.check_int(p, {1, 2}, validator.IN, 'p', self.name) 2463 self.margin = validator.check_value_type('margin', margin, [float], self.name) 2464 self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) 2465 self.init_prim_io_names(inputs=['x', 'target', 'weight'], outputs=['y']) 2466 2467 def __call__(self, x, target, weight=None): 2468 return super().__call__(x, target, weight) 2469 2470 2471class SoftMarginLoss(Primitive): 2472 r""" 2473 SoftMarginLoss operation. 2474 2475 Creates a criterion that optimizes a two-class classification 2476 logistic loss between input tensor :math:`x` and target tensor :math:`y` 2477 (containing 1 or -1). 2478 2479 .. math:: 2480 \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()} 2481 2482 where :math:`x.nelement()` is the number of elements of x. 2483 2484 Args: 2485 reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , 2486 ``'sum'`` . Default: ``'mean'`` . 2487 2488 - ``'none'``: no reduction will be applied. 2489 - ``'mean'``: compute and return the mean of elements in the output. 2490 - ``'sum'``: the output elements will be summed. 2491 2492 Inputs: 2493 - **logits** (Tensor) - Predict data. Data type must be float16 or float32. 2494 - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`. 2495 2496 Outputs: 2497 Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`. 2498 Otherwise, a scalar value will be returned. 2499 2500 Raises: 2501 TypeError: If `logits` or `labels` is not a Tensor. 2502 TypeError: If dtype of `logits` or `labels` is neither float16 nor float32. 2503 ValueError: If shape of `logits` is not the same as `labels`. 2504 ValueError: If `reduction` is not one of ``"none"`` , ``"mean"`` or ``"sum"`` . 2505 2506 Supported Platforms: 2507 ``Ascend`` ``GPU`` 2508 2509 Examples: 2510 >>> import mindspore 2511 >>> import numpy as np 2512 >>> from mindspore import Tensor, ops 2513 >>> loss = ops.SoftMarginLoss() 2514 >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32) 2515 >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32) 2516 >>> output = loss(logits, labels) 2517 >>> print(output) 2518 0.6764238 2519 """ 2520 2521 @prim_attr_register 2522 def __init__(self, reduction="mean"): 2523 """Initialize SoftMarginLoss""" 2524 self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss']) 2525 self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) 2526 2527 2528class L2Loss(Primitive): 2529 r""" 2530 Calculates half of the L2 norm, but do not square the result. 2531 2532 Set input as x and output as loss. 2533 2534 .. math:: 2535 loss = \frac{\sum x ^ 2}{2} 2536 2537 Inputs: 2538 - **input_x** (Tensor) - Tensor for computing the L2 norm. Data type must be float16, float32 or float64. 2539 2540 Outputs: 2541 Tensor, has a Scalar Tensor with the same data type as `input_x`. 2542 2543 Raises: 2544 TypeError: If `input_x` is not a Tensor. 2545 TypeError: If dtype of `input_x` is not float16, float32 or float64. 2546 2547 Supported Platforms: 2548 ``Ascend`` ``GPU`` ``CPU`` 2549 2550 Examples: 2551 >>> import mindspore 2552 >>> import numpy as np 2553 >>> from mindspore import Tensor, ops 2554 >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16) 2555 >>> l2_loss = ops.L2Loss() 2556 >>> output = l2_loss(input_x) 2557 >>> print(output) 2558 7.0 2559 """ 2560 2561 @prim_attr_register 2562 def __init__(self): 2563 """Initialize L2Loss""" 2564 2565 2566class DataFormatDimMap(Primitive): 2567 """ 2568 Returns the dimension index in the destination data format given in the source data format. 2569 2570 Args: 2571 src_format (str): An optional value for source data format. The format can be ``'NHWC'`` and ``'NCHW'`` . 2572 Default: ``'NHWC'`` . 2573 dst_format (str): An optional value for destination data format. The format can be ``'NHWC'`` and ``'NCHW'`` . 2574 Default: ``'NCHW'`` . 2575 2576 Inputs: 2577 - **input_x** (Tensor) - A Tensor, each element is used as a dimension index of the source data format. 2578 The suggested values are in the range [-4, 4). Only supports int32. 2579 2580 Outputs: 2581 Tensor, Return the dimension index in the given target data format, 2582 has the same data type and shape as the `input_x`. 2583 2584 Raises: 2585 TypeError: If `src_format` or `dst_format` is not a str. 2586 TypeError: If `input_x` is not a Tensor whose dtype is not int32. 2587 2588 Supported Platforms: 2589 ``Ascend`` ``GPU`` ``CPU`` 2590 2591 Examples: 2592 >>> import mindspore 2593 >>> from mindspore import Tensor, ops 2594 >>> input_x = Tensor([0, 1, 2, 3], mindspore.int32) 2595 >>> dfdm = ops.DataFormatDimMap() 2596 >>> output = dfdm(input_x) 2597 >>> print(output) 2598 [0 3 1 2] 2599 """ 2600 2601 @prim_attr_register 2602 def __init__(self, src_format='NHWC', dst_format='NCHW'): 2603 """Initialize DataFormatDimMap.""" 2604 valid_values = ['NHWC', 'NCHW'] 2605 self.src_format = validator.check_string(src_format, valid_values, "src_format", self.name) 2606 self.dst_format = validator.check_string(dst_format, valid_values, "dst_format", self.name) 2607 self.init_prim_io_names(inputs=['input_x'], outputs=['output']) 2608 2609 2610class RNNTLoss(PrimitiveWithInfer): 2611 """ 2612 Computes the RNNTLoss and its gradient with respect to the softmax outputs. 2613 2614 Args: 2615 blank_label (int): blank label. Default: ``0`` . 2616 2617 Inputs: 2618 - **acts** (Tensor) - Tensor of shape :math:`(B, T, U, V)`, where :math:`B` is batch, 2619 :math:`T` is sequence length, :math:`U` is label length and :math:`V` is output dim. 2620 Data type must be float16 or float32. 2621 - **labels** (Tensor) - Tensor of shape :math:`(B, U-1)`. Data type is int32. 2622 - **input_lengths** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32. 2623 - **label_lengths** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32. 2624 2625 Outputs: 2626 - **costs** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32. 2627 - **grads** (Tensor) - Has the same shape and dtype as `acts`. 2628 2629 Raises: 2630 TypeError: If `acts`, `labels`, `input_lengths` or `label_lengths` is not a Tensor. 2631 TypeError: If dtype of `acts` is neither float16 nor float32. 2632 TypeError: If dtype of `labels`, `input_lengths` or `label_lengths` is not int32. 2633 2634 Supported Platforms: 2635 ``Ascend`` 2636 2637 Examples: 2638 >>> import numpy as np 2639 >>> from mindspore import ops, Tensor 2640 >>> B, T, U, V = 1, 2, 3, 5 2641 >>> blank = 0 2642 >>> acts = np.random.random((B, T, U, V)).astype(np.float32) 2643 >>> labels = np.array([[1, 2]]).astype(np.int32) 2644 >>> input_length = np.array([T] * B).astype(np.int32) 2645 >>> label_length = np.array([len(l) for l in labels]).astype(np.int32) 2646 >>> rnnt_loss = ops.RNNTLoss(blank_label=0) 2647 >>> costs, grads = rnnt_loss(Tensor(acts), Tensor(labels), Tensor(input_length), Tensor(label_length)) 2648 >>> print(costs.shape) 2649 (1,) 2650 >>> print(grads.shape) 2651 (1, 2, 3, 5) 2652 """ 2653 2654 @prim_attr_register 2655 def __init__(self, blank_label=0): 2656 """Initialize RNNTLoss.""" 2657 validator.check_value_type('blank_label', blank_label, [int], self.name) 2658 self.init_prim_io_names(inputs=['acts', 'labels', 'input_length', 'label_length'], 2659 outputs=['costs', 'grads']) 2660 2661 def infer_shape(self, acts_shape, labels_shape, input_length_shape, label_length_shape): 2662 validator.check_equal_int(len(acts_shape), 4, 'acts_rank', self.name) 2663 validator.check_equal_int(len(labels_shape), 2, 'labels_rank', self.name) 2664 validator.check_equal_int(len(input_length_shape), 1, 'input_length_rank', self.name) 2665 validator.check_equal_int(len(label_length_shape), 1, 'label_length_rank', self.name) 2666 validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], validator.EQ, self.name) 2667 validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1', 2668 acts_shape[2] - 1, validator.EQ, self.name) 2669 validator.check('input_length size', input_length_shape[0], 'acts shape[0]', 2670 acts_shape[0], validator.EQ, self.name) 2671 validator.check('label_length size', label_length_shape[0], 'acts shape[0]', 2672 acts_shape[0], validator.EQ, self.name) 2673 costs_shape = (acts_shape[0],) 2674 return costs_shape, acts_shape 2675 2676 def infer_dtype(self, acts_type, labels_type, input_length_type, label_length_type): 2677 validator.check_tensor_dtype_valid("acts_type", acts_type, [mstype.float32, mstype.float16], self.name) 2678 tuple(map(partial(validator.check_tensor_dtype_valid, 2679 valid_dtypes=(mstype.int32,), prim_name=self.name), 2680 ("labels", "input_length", "label_length"), 2681 (labels_type, input_length_type, label_length_type))) 2682 return acts_type, acts_type 2683 2684 2685class SGD(PrimitiveWithCheck): 2686 """ 2687 Computes the stochastic gradient descent. Momentum is optional. 2688 2689 Nesterov momentum is based on the formula from paper `On the importance of 2690 initialization and momentum in deep learning <http://proceedings.mlr.press/v28/sutskever13.html>`_. 2691 2692 Note: 2693 If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without 2694 'beta' or 'gamma' in their names. Users can group parameters to change the strategy of decaying weight. When 2695 parameters are grouped, each group can set `weight_decay`. If not, the `weight_decay` in optimizer will be 2696 applied. 2697 For more details, please refer to :class:`mindspore.nn.SGD`. 2698 2699 Args: 2700 dampening (float): The dampening for momentum. Default: ``0.0`` . 2701 weight_decay (float): Weight decay (L2 penalty). Default: ``0.0`` . 2702 nesterov (bool): Enable Nesterov momentum. Default: ``False`` . 2703 2704 Inputs: 2705 - **parameters** (Tensor) - Parameters to be updated. With float16 or float32 data type. 2706 - **gradient** (Tensor) - Gradient, with float16 or float32 data type. 2707 - **learning_rate** (Tensor) - Learning rate, a scalar tensor with float16 or float32 data type. 2708 e.g. Tensor(0.1, mindspore.float32) 2709 - **accum** (Tensor) - Accum(velocity) to be updated. With float16 or float32 data type. 2710 - **momentum** (Tensor) - Momentum, a scalar tensor with float16 or float32 data type. 2711 e.g. Tensor(0.1, mindspore.float32). 2712 - **stat** (Tensor) - States to be updated with the same shape as gradient, with float16 or float32 data type. 2713 2714 Outputs: 2715 Tensor, parameters to be updated. 2716 2717 Raises: 2718 TypeError: If `dampening` or `weight_decay` is not a float. 2719 TypeError: If `nesterov` is not a bool. 2720 TypeError: If `parameters`, `gradient`, `learning_rate`, `accum`, `momentum` or `stat` is not a Tensor. 2721 TypeError: If dtype of `parameters`, `gradient`, `learning_rate`, `accum`, `momentum` or `stat` is neither 2722 float16 nor float32. 2723 2724 Supported Platforms: 2725 ``Ascend`` ``GPU`` ``CPU`` 2726 2727 Examples: 2728 >>> import mindspore 2729 >>> import numpy as np 2730 >>> from mindspore import Tensor, ops 2731 >>> sgd = ops.SGD() 2732 >>> parameters = Tensor(np.array([2, -0.5, 1.7, 4]), mindspore.float32) 2733 >>> gradient = Tensor(np.array([1, -1, 0.5, 2]), mindspore.float32) 2734 >>> learning_rate = Tensor(0.01, mindspore.float32) 2735 >>> accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mindspore.float32) 2736 >>> momentum = Tensor(0.1, mindspore.float32) 2737 >>> stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mindspore.float32) 2738 >>> output = sgd(parameters, gradient, learning_rate, accum, momentum, stat) 2739 >>> print(output.asnumpy()) 2740 [1.99 -0.4903 1.695 3.9801] 2741 """ 2742 2743 @prim_attr_register 2744 def __init__(self, dampening=0.0, weight_decay=0.0, nesterov=False): 2745 """Initialize SGD.""" 2746 validator.check_value_type("nesterov", nesterov, [bool], self.name) 2747 if nesterov and dampening != 0: 2748 raise ValueError(f"For '{self.name}', the 'dampening' must be 0 when 'nesterov' is True, " 2749 f"but got 'dampening' is {dampening} and 'nesterov' is {nesterov}.") 2750 self.init_prim_io_names(inputs=['parameters', 'gradient', 'learning_rate', 'accum', 'momentum', 'stat'], 2751 outputs=['output']) 2752 self.add_prim_attr('side_effect_mem', True) 2753 2754 def check_shape(self, parameters_shape, gradient_shape, learning_rate_shape, 2755 accum_shape, momentum_shape, stat_shape): 2756 validator.check_int(len(gradient_shape), 0, validator.GE, f'gradient rank', self.name) 2757 validator.check_int(len(learning_rate_shape), 0, validator.GE, f'learning rate rank', self.name) 2758 validator.check_int(len(momentum_shape), 0, validator.GE, f'momentum rank', self.name) 2759 validator.check_int(len(stat_shape), 0, validator.GE, f'stat rank', self.name) 2760 2761 def check_dtype(self, parameters_dtype, gradient_dtype, learning_rate_dtype, 2762 accum_dtype, momentum_dtype, stat_dtype): 2763 tuple(map(partial(validator.check_tensor_dtype_valid, 2764 valid_dtypes=(mstype.float16, mstype.float32), prim_name=self.name), 2765 ("parameters", "gradient", "learning_rate", "accum", "momentum", "stat"), 2766 (parameters_dtype, gradient_dtype, learning_rate_dtype, accum_dtype, momentum_dtype, stat_dtype))) 2767 2768 2769class ApplyRMSProp(PrimitiveWithInfer): 2770 r""" 2771 Optimizer that implements the Root Mean Square prop(RMSProp) algorithm. 2772 Please refer to the usage in source code of :class:`mindspore.nn.RMSProp`. 2773 2774 The updating formulas of ApplyRMSProp algorithm are as follows, 2775 2776 .. math:: 2777 \begin{array}{ll} \\ 2778 s_{t+1} = \rho s_{t} + (1 - \rho)(\nabla Q_{i}(w))^2 \\ 2779 m_{t+1} = \beta m_{t} + \frac{\eta} {\sqrt{s_{t+1} + \epsilon}} \nabla Q_{i}(w) \\ 2780 w = w - m_{t+1} 2781 \end{array} 2782 2783 where :math:`w` represents `var`, which will be updated. 2784 :math:`s_{t+1}` represents `mean_square`, :math:`s_{t}` is the last moment of :math:`s_{t+1}`, 2785 :math:`m_{t+1}` represents `moment`, :math:`m_{t}` is the last moment of :math:`m_{t+1}`. 2786 :math:`\rho` represents `decay`. :math:`\beta` is the momentum term, represents `momentum`. 2787 :math:`\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. 2788 :math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`. 2789 2790 .. warning:: 2791 Note that in dense implementation of this algorithm, "mean_square" and "moment" will update even if "grad" is 0, 2792 but in this sparse implementation, "mean_square" and "moment" will not update 2793 in iterations during which "grad" is 0. 2794 2795 Args: 2796 use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors 2797 from being updated. Default: ``False`` . 2798 2799 Inputs: 2800 - **var** (Parameter) - Weights to be updated. 2801 - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`. 2802 - **moment** (Tensor) - Delta of `var`, must be the same type as `var`. 2803 - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or 2804 a scalar tensor with float16 or float32 data type. 2805 - **grad** (Tensor) - Gradient, must be the same type as `var`. 2806 - **decay** (float) - Decay rate. Only constant value is allowed. 2807 - **momentum** (float) - Momentum. Only constant value is allowed. 2808 - **epsilon** (float) - Ridge term. Only constant value is allowed. 2809 2810 Outputs: 2811 Tensor, parameters to be updated. 2812 2813 Raises: 2814 TypeError: If `use_locking` is not a bool. 2815 TypeError: If `var`, `mean_square`, `moment` or `decay` is not a Tensor. 2816 TypeError: If `learning_rate` is neither a Number nor a Tensor. 2817 TypeError: If dtype of `decay`, `momentum` or `epsilon` is not float. 2818 TypeError: If dtype of `learning_rate` is neither float16 nor float32. 2819 ValueError: If `decay`, `momentum` or `epsilon` is not a constant value. 2820 2821 Supported Platforms: 2822 ``Ascend`` ``GPU`` ``CPU`` 2823 2824 Examples: 2825 >>> import numpy as np 2826 >>> from mindspore import Tensor, nn, ops, Parameter 2827 >>> class Net(nn.Cell): 2828 ... def __init__(self): 2829 ... super(Net, self).__init__() 2830 ... self.apply_rms_prop = ops.ApplyRMSProp() 2831 ... self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") 2832 ... 2833 ... def construct(self, mean_square, moment, grad, decay, momentum, epsilon, lr): 2834 ... out = self.apply_rms_prop(self.var, mean_square, moment, lr, grad, decay, momentum, epsilon) 2835 ... return out 2836 ... 2837 >>> net = Net() 2838 >>> mean_square = Tensor(np.ones([2, 2]).astype(np.float32)) 2839 >>> moment = Tensor(np.ones([2, 2]).astype(np.float32)) 2840 >>> grad = Tensor(np.ones([2, 2]).astype(np.float32)) 2841 >>> output = net(mean_square, moment, grad, 0.0, 1e-10, 0.001, 0.01) 2842 >>> print(net.var.asnumpy()) 2843 [[0.990005 0.990005] 2844 [0.990005 0.990005]] 2845 """ 2846 2847 @prim_attr_register 2848 def __init__(self, use_locking=False): 2849 """Initialize ApplyRMSProp.""" 2850 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 2851 self.init_prim_io_names(inputs=['var', 'mean_square', 'moment', 'learning_rate', 'grad', 2852 'rho', 'momentum', 'epsilon'], outputs=['output']) 2853 self.add_prim_attr('side_effect_mem', True) 2854 2855 2856class ApplyCenteredRMSProp(Primitive): 2857 r""" 2858 Optimizer that implements the centered RMSProp algorithm. 2859 Please refer to the usage in source code of :class:`mindspore.nn.RMSProp`. 2860 2861 The updating formulas of ApplyCenteredRMSProp algorithm are as follows, 2862 2863 .. math:: 2864 \begin{array}{ll} \\ 2865 g_{t+1} = \rho g_{t} + (1 - \rho)\nabla Q_{i}(w) \\ 2866 s_{t+1} = \rho s_{t} + (1 - \rho)(\nabla Q_{i}(w))^2 \\ 2867 m_{t+1} = \beta m_{t} + \frac{\eta} {\sqrt{s_{t+1} - g_{t+1}^2 + \epsilon}} \nabla Q_{i}(w) \\ 2868 w = w - m_{t+1} 2869 \end{array} 2870 2871 where :math:`w` represents `var`, which will be updated. 2872 :math:`g_{t+1}` represents `mean_gradient`, :math:`g_{t}` is the last moment of :math:`g_{t+1}`. 2873 :math:`s_{t+1}` represents `mean_square`, :math:`s_{t}` is the last moment of :math:`s_{t+1}`, 2874 :math:`m_{t+1}` represents `moment`, :math:`m_{t}` is the last moment of :math:`m_{t+1}`. 2875 :math:`\rho` represents `decay`. :math:`\beta` is the momentum term, represents `momentum`. 2876 :math:`\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. 2877 :math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`. 2878 2879 Note: 2880 The difference between `ApplyCenteredRMSProp` and `ApplyRMSProp` is that the former 2881 uses the centered RMSProp algorithm, and the centered RRMSProp algorithm uses an estimate of the centered second 2882 moment(i.e., the variance) for normalization, as opposed to regular RMSProp, which uses the (uncertained) 2883 second moment. This often helps with training, but is slightly more expensive in terms of computation and 2884 memory. 2885 2886 .. warning:: 2887 In dense implementation of this algorithm, `mean_gradient`, `mean_square`, and `moment` will update 2888 even if the `grad` is zero. But in this sparse implementation, `mean_gradient`, `mean_square`, and `moment` 2889 will not update in iterations during which the `grad` is zero. 2890 2891 Args: 2892 use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors 2893 from being updated. Default: ``False`` . 2894 2895 Inputs: 2896 - **var** (Parameter) - Weights to be updated. 2897 - **mean_gradient** (Tensor) - Mean gradients, must be the same type as `var`. 2898 - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`. 2899 - **moment** (Tensor) - Delta of `var`, must be the same type as `var`. 2900 - **grad** (Tensor) - Gradient, must be the same type as `var`. 2901 - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or 2902 a scalar tensor with float16 or float32 data type. 2903 - **decay** (float) - Decay rate. 2904 - **momentum** (float) - Momentum. 2905 - **epsilon** (float) - Ridge term. 2906 2907 Outputs: 2908 Tensor, parameters to be updated. 2909 2910 Raises: 2911 TypeError: If `use_locking` is not a bool. 2912 TypeError: If `var`, `mean_gradient`, `mean_square`, `moment` or `grad` is not a Tensor. 2913 TypeError: If `learing_rate` is neither a Number nor a Tensor. 2914 TypeError: If dtype of `learing_rate` is neither float16 nor float32. 2915 TypeError: If `decay`, `momentum` or `epsilon` is not a float. 2916 2917 Supported Platforms: 2918 ``Ascend`` ``GPU`` ``CPU`` 2919 2920 Examples: 2921 >>> import numpy as np 2922 >>> from mindspore import Tensor, nn, ops, Parameter 2923 >>> class Net(nn.Cell): 2924 ... def __init__(self): 2925 ... super(Net, self).__init__() 2926 ... self.apply_centerd_rms_prop = ops.ApplyCenteredRMSProp() 2927 ... self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") 2928 ... 2929 ... def construct(self, mean_grad, mean_square, moment, grad, decay, momentum, epsilon, lr): 2930 ... out = self.apply_centerd_rms_prop(self.var, mean_grad, mean_square, moment, grad, 2931 ... lr, decay, momentum, epsilon) 2932 ... return out 2933 ... 2934 >>> net = Net() 2935 >>> mean_grad = Tensor(np.ones([2, 2]).astype(np.float32)) 2936 >>> mean_square = Tensor(np.ones([2, 2]).astype(np.float32)) 2937 >>> moment = Tensor(np.ones([2, 2]).astype(np.float32)) 2938 >>> grad = Tensor(np.ones([2, 2]).astype(np.float32)) 2939 >>> output = net(mean_grad, mean_square, moment, grad, 0.0, 1e-10, 0.001, 0.01) 2940 >>> print(net.var.asnumpy()) 2941 [[0.68377227 0.68377227] 2942 [0.68377227 0.68377227]] 2943 """ 2944 2945 @prim_attr_register 2946 def __init__(self, use_locking=False): 2947 """Initialize ApplyCenteredRMSProp.""" 2948 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 2949 self.add_prim_attr('side_effect_mem', True) 2950 2951 2952class L2Normalize(Primitive): 2953 r""" 2954 L2 Normalization Operator. 2955 2956 This operator will normalize the input using the given axis. The function is shown as follows: 2957 2958 .. math:: 2959 \displaylines{{\text{output} = \frac{x}{\sqrt{\text{max}( \sum_{i}^{}\left | x_i \right | ^2, \epsilon)}}}} 2960 2961 where :math:`\epsilon` is epsilon and :math:`\sum_{i}^{}\left | x_i \right | ^2` calculate the sum of squares of 2962 the input `x` along the dimension `axis`. 2963 2964 Note: 2965 On Ascend, input data type of float64 is currently not supported. 2966 2967 Args: 2968 axis (Union[list(int), tuple(int), int], optional): Specify the axis for calculating the L2 norm. 2969 Default: ``0`` . 2970 epsilon (float, optional): A small value added for numerical stability. Default: ``1e-4`` . 2971 2972 Inputs: 2973 - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, *)`, 2974 where :math:`*` means any number of additional dimensions. 2975 Data type must be float16, float32 or float64. 2976 2977 Outputs: 2978 Tensor, with the same type and shape as the `x`. 2979 2980 Raises: 2981 TypeError: If `axis` is not one of the following: list, tuple or int. 2982 TypeError: If `epsilon` is not a float. 2983 TypeError: If `x` is not a Tensor. 2984 TypeError: If dtype of `x` is not in [float16, float32, float64]. 2985 ValueError: If dimension of `x` is not greater than 0. 2986 2987 Supported Platforms: 2988 ``Ascend`` ``GPU`` ``CPU`` 2989 2990 Examples: 2991 >>> import mindspore 2992 >>> import numpy as np 2993 >>> from mindspore import Tensor, ops 2994 >>> l2_normalize = ops.L2Normalize() 2995 >>> x = Tensor(np.random.randint(-256, 256, (2, 3, 4)), mindspore.float32) 2996 >>> output = l2_normalize(x) 2997 >>> print(output.shape) 2998 (2, 3, 4) 2999 """ 3000 3001 @prim_attr_register 3002 def __init__(self, axis=0, epsilon=1e-4): 3003 """Initialize L2Normalize.""" 3004 axis = [axis] if isinstance(axis, int) else axis 3005 validator.check_value_type('axis', axis, [list, tuple], self.name) 3006 validator.check_value_type('epsilon', epsilon, [int, float], self.name) 3007 self.add_prim_attr('axis', axis) 3008 self.init_attrs['axis'] = axis 3009 if len(axis) != 1: 3010 raise TypeError(f"For '{self.name}', the length of 'axis' must be 1, but got {len(axis)}, " 3011 f"later will support multiple axis!") 3012 self.axis = axis 3013 3014 3015class GetNext(Primitive): 3016 """ 3017 Returns the next element in the dataset queue. 3018 3019 Note: 3020 The GetNext operation needs to be associated with network and it also depends 3021 on the 'dataset' interface, For example, please refer to :class:`mindspore.dataset.MnistDataset` . 3022 it can't be used directly as a single operation. 3023 For details, please refer to :class:`mindspore.connect_network_with_dataset` source code. 3024 3025 Args: 3026 types (list[:class:`mindspore.dtype`]): The type of the outputs. 3027 shapes (list[tuple[int]]): The dimensionality of the outputs. 3028 output_num (int): The output number, length of `types` and `shapes`. 3029 shared_name (str): Queue name to fetch the data. 3030 3031 Inputs: 3032 No inputs. 3033 3034 Outputs: 3035 tuple[Tensor], the output of dataset. The shape is described in `shapes` 3036 and the type is described in `types`. 3037 3038 Supported Platforms: 3039 ``Ascend`` ``GPU`` 3040 3041 Examples: 3042 >>> import mindspore 3043 >>> from mindspore import ops 3044 >>> from mindspore import dataset as ds 3045 >>> from mindspore import dtype as mstype 3046 >>> data_path = "/path/to/MNIST_Data/train/" 3047 >>> train_dataset = ds.MnistDataset(data_path, num_samples=10) 3048 >>> dataset_helper = mindspore.DatasetHelper(train_dataset, dataset_sink_mode=True) 3049 >>> dataset = dataset_helper.iter.dataset 3050 >>> dataset_types, dataset_shapes = dataset_helper.types_shapes() 3051 >>> queue_name = dataset.__transfer_dataset__.queue_name 3052 >>> get_next = ops.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name) 3053 >>> data, label = get_next() 3054 >>> relu = ops.ReLU() 3055 >>> result = relu(data.astype(mstype.float32)) 3056 >>> print(result.shape) 3057 (28, 28, 1) 3058 """ 3059 3060 @prim_attr_register 3061 def __init__(self, types, shapes, output_num, shared_name): 3062 """Initialize GetNext.""" 3063 validator.check_value_type("types", types, [list, tuple], self.name) 3064 validator.check_value_type("shapes", shapes, [list, tuple], self.name) 3065 validator.check("types length", len(types), "shapes length", len(shapes), validator.EQ, self.name) 3066 validator.check_value_type("output_num", output_num, [int], self.name) 3067 3068 3069class LSTM(Primitive): 3070 r""" 3071 Performs the Long Short-Term Memory (LSTM) on the input. 3072 3073 For more information, please refer to :class:`mindspore.nn.LSTM`. 3074 3075 Args: 3076 input_size (int): Number of features of input. 3077 hidden_size (int): Number of features of hidden layer. 3078 num_layers (int): Number of layers of stacked LSTM. 3079 has_bias (bool): Whether the cell has bias `b_ih` and `b_hh`. 3080 bidirectional (bool): Specifies whether it is a bidirectional LSTM. 3081 dropout (float): If not 0, append `Dropout` layer on the outputs of each 3082 LSTM layer except the last layer. The range of dropout is [0.0, 1.0]. 3083 proj_size (int): If `proj_size` > 0, a projection of the corresponding size will be used, 3084 which is only supported on CPU now. Default: ``0`` . 3085 3086 Inputs: 3087 - **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or 3088 :math:`(batch\_size, seq\_len, input\_size)`. 3089 - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`. 3090 - **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. 3091 - **w** (Tensor) - A weight Tensor. 3092 3093 If :math:`proj\_size > 0` , :math:`real\_hidden\_size = proj\_size` , otherwise 3094 :math:`real\_hidden\_size = hidden\_size` . 3095 3096 Outputs: 3097 Tuple, a tuple contains `(output, h_n, c_n, reserve, state)`. 3098 3099 - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * real\_hidden\_size)`. 3100 - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`. 3101 - **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. 3102 - **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`. 3103 - **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`. 3104 3105 Raises: 3106 TypeError: If `input_size`, `hidden_size` or `num_layers` is not an int. 3107 TypeError: If `has_bias` or `bidirectional` is not a bool. 3108 TypeError: If `dropout` is not a float. 3109 ValueError: If `dropout` is not in range [0.0, 1.0]. 3110 ValueError: If `proj_size` is not in range [0, `hidden_size`). 3111 3112 Supported Platforms: 3113 ``GPU`` ``CPU`` 3114 3115 Examples: 3116 >>> import numpy as np 3117 >>> from mindspore import Tensor, ops 3118 >>> input_size = 10 3119 >>> hidden_size = 2 3120 >>> num_layers = 1 3121 >>> seq_len = 5 3122 >>> batch_size = 2 3123 >>> 3124 >>> net = ops.LSTM(input_size, hidden_size, num_layers, True, False, 0.0) 3125 >>> input_tensor = Tensor(np.ones([seq_len, batch_size, input_size]).astype(np.float32)) 3126 >>> h0 = Tensor(np.ones([num_layers, batch_size, hidden_size]).astype(np.float32)) 3127 >>> c0 = Tensor(np.ones([num_layers, batch_size, hidden_size]).astype(np.float32)) 3128 >>> w = Tensor(np.ones([112, 1, 1]).astype(np.float32)) 3129 >>> output, hn, cn, _, _ = net(input_tensor, h0, c0, w) 3130 >>> print(output) 3131 [[[0.9640267 0.9640267 ] 3132 [0.9640267 0.9640267 ]] 3133 [[0.9950539 0.9950539 ] 3134 [0.9950539 0.9950539 ]] 3135 [[0.99932843 0.99932843] 3136 [0.99932843 0.99932843]] 3137 [[0.9999084 0.9999084 ] 3138 [0.9999084 0.9999084 ]] 3139 [[0.9999869 0.9999869 ] 3140 [0.9999869 0.9999869 ]]] 3141 """ 3142 3143 @prim_attr_register 3144 def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size=0): 3145 """Initialize LSTM.""" 3146 self.input_size = validator.check_positive_int(input_size, "input_size", self.name) 3147 self.hidden_size = validator.check_positive_int(hidden_size, "hidden_size", self.name) 3148 self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT, 3149 'proj_size', self.name) 3150 self.num_layers = validator.check_positive_int(num_layers, "num_layers", self.name) 3151 self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name) 3152 self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name) 3153 self.dropout = validator.check_value_type("dropout", dropout, [float], self.name) 3154 self.dropout = validator.check_float_range(dropout, 0, 1, validator.INC_BOTH, 'dropout', self.name) 3155 self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT, 3156 'proj_size', self.name) 3157 3158 if bidirectional: 3159 self.num_directions = 2 3160 else: 3161 self.num_directions = 1 3162 3163 def infer_shape(self, x_shape, h_shape, c_shape, w_shape): 3164 validator.check_equal_int(len(x_shape), 3, "x rank", self.name) 3165 validator.check_equal_int(x_shape[2], self.input_size, "x[2]", self.name) 3166 3167 # h and c should be same shape 3168 validator.check_equal_int(len(h_shape), 3, "h rank", self.name) 3169 if self.proj_size == 0: 3170 validator.check("h_shape", h_shape, "c_shape", c_shape, Rel.EQ, self.name) 3171 3172 real_hidden_size = self.proj_size if self.proj_size > 0 else self.hidden_size 3173 validator.check_int(h_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h[0]", self.name) 3174 validator.check_equal_int(h_shape[1], x_shape[1], "h[1]", self.name) 3175 validator.check_int(h_shape[2], real_hidden_size, Rel.EQ, "h[2]", self.name) 3176 3177 y_shape = (x_shape[0], x_shape[1], real_hidden_size * self.num_directions) 3178 3179 # set arbitrary shape for reserved space 3180 reserved_shape = (1, 1) 3181 state_shape = (1, 1) 3182 return y_shape, h_shape, c_shape, reserved_shape, state_shape 3183 3184 def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype): 3185 args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype} 3186 validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name) 3187 return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype 3188 3189 3190class SigmoidCrossEntropyWithLogits(Primitive): 3191 r""" 3192 Uses the given logits to compute sigmoid cross entropy between the logits and the label. 3193 3194 Measures the distribution error in discrete classification tasks where each class is independent 3195 and not mutually exclusive using cross entropy loss. 3196 3197 Sets input logits as :math:`X`, input label as :math:`Y`, output as :math:`loss`. Then, 3198 3199 .. math:: 3200 3201 \begin{array}{ll} \\ 3202 p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}} \\ 3203 loss_{ij} = -[Y_{ij} * ln(p_{ij}) + (1 - Y_{ij})ln(1 - p_{ij})] 3204 \end{array} 3205 3206 Inputs: 3207 - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means any number 3208 of additional dimensions. 3209 - **label** (Tensor) - Ground truth label. With the same shape and type as `logits`. 3210 3211 Outputs: 3212 Tensor, with the same shape and type as input `logits`. 3213 3214 Raises: 3215 TypeError: If `logits` or `label` is not a Tensor. 3216 3217 Supported Platforms: 3218 ``Ascend`` ``GPU`` ``CPU`` 3219 3220 Examples: 3221 >>> import numpy as np 3222 >>> from mindspore import Tensor, ops 3223 >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]).astype(np.float32)) 3224 >>> labels = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]).astype(np.float32)) 3225 >>> sigmoid = ops.SigmoidCrossEntropyWithLogits() 3226 >>> output = sigmoid(logits, labels) 3227 >>> print(output) 3228 [[ 0.6111007 0.5032824 0.26318604] 3229 [ 0.58439666 0.5530153 -0.4368139 ]] 3230 """ 3231 3232 @prim_attr_register 3233 def __init__(self): 3234 """Initialize SigmoidCrossEntropyWithLogits""" 3235 self.init_prim_io_names(inputs=['predict', 'target'], outputs=['loss']) 3236 3237 3238class Pad(Primitive): 3239 r""" 3240 Pads the input tensor according to the paddings. 3241 3242 Refer to :func:`mindspore.ops.pad` for more details. Use :func:`mindspore.ops.pad` instead if `paddings` has 3243 negative values. 3244 3245 Args: 3246 paddings (tuple): The shape of parameter `paddings` is (N, 2). N is the rank of input data. All elements of 3247 paddings are int type. For the input in `D` th dimension, paddings[D, 0] indicates how many sizes to be 3248 extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1] indicates how many sizes to 3249 be extended behind the input tensor in the `D` th dimension. 3250 3251 Inputs: 3252 - **input_x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means 3253 any number of additional dimensions. 3254 3255 Outputs: 3256 Tensor, the tensor after padding. 3257 3258 Raises: 3259 TypeError: If `paddings` is not a tuple. 3260 TypeError: If `input_x` is not a Tensor. 3261 ValueError: If shape of `paddings` is not :math:`(N, 2)`. 3262 ValueError: If paddings.size is not equal to 2 * len(input_x). 3263 3264 Supported Platforms: 3265 ``Ascend`` ``GPU`` ``CPU`` 3266 3267 Examples: 3268 >>> import mindspore 3269 >>> import numpy as np 3270 >>> from mindspore import Tensor, ops 3271 >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) 3272 >>> pad_op = ops.Pad(((1, 2), (2, 1))) 3273 >>> output = pad_op(input_x) 3274 >>> print(output) 3275 [[ 0. 0. 0. 0. 0. 0. ] 3276 [ 0. 0. -0.1 0.3 3.6 0. ] 3277 [ 0. 0. 0.4 0.5 -3.2 0. ] 3278 [ 0. 0. 0. 0. 0. 0. ] 3279 [ 0. 0. 0. 0. 0. 0. ]] 3280 """ 3281 3282 @prim_attr_register 3283 def __init__(self, paddings): 3284 """Initialize Pad""" 3285 self.init_prim_io_names(inputs=['x'], outputs=['y']) 3286 validator.check_value_type("paddings", paddings, [tuple], self.name) 3287 self.paddings = paddings 3288 3289 3290class PadV3(Primitive): 3291 """ 3292 Pads the input Tensor according to the `paddings`, `mode` and `paddings_contiguous`. 3293 3294 Args: 3295 mode (str, optional): An optional string indicates padding mode, 3296 support ``"constant"`` , ``"reflect"`` , ``"edge"`` , ``"circular"`` . Default: ``"constant"`` . 3297 The effects of various padding modes are as follows: 3298 3299 - ``"constant"``: Pads the input Tensor with value specified by `constant_value`. 3300 - ``"reflect"``: Pads the input Tensor by reflecting the values of the pixels at the 3301 boundary of the Tensor. 3302 - ``"edge"``: Pads the input Tensor with the values of the pixels on the border of the Tensor. 3303 - ``"circular"``: Circular padding mode. In this mode, the pixels from one edge of the image 3304 are wrapped around to the opposite edge, such that the pixel on the right edge of the 3305 image is replaced with the pixel on the left edge, and the pixel on the bottom edge 3306 is replaced with the pixel on the top edge. 3307 3308 paddings_contiguous (bool, optional): An optional bool value indicates if the padding is paddings_contiguous. 3309 If ``True`` , paddings is arranged as [begin0, end0, begin1, end1, ...] 3310 If ``False`` , paddings is arranged as [begin0, begin1, ..., end1, end2, ...] 3311 Default: ``True`` . 3312 3313 Inputs: 3314 - **x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means 3315 any number of additional dimensions. 3316 - **paddings** (Tensor) - Specifies the number of zeros to be padded before and after each 3317 dimension of the input Tensor `x`. It's a 1D Tensor of type int32 or int64. 3318 - **constant_value** (Tensor, optional) - Padding value to use in 'constant' mode, 3319 if not specified, 0 is used instead. It has the same type as `x`. 3320 3321 Outputs: 3322 Tensor, the tensor after padding. 3323 3324 Raises: 3325 TypeError: If `x` or `paddings` is not a Tensor. 3326 TypeError: If `padding_contiguous` is not a bool. 3327 ValueError: If `mode` is not a str or not in support modes. 3328 ValueError: If `mode` is "constant", the element's number of `paddings` not be even. 3329 ValueError: If `mode` is "constant", the element's number of `paddings` large than input dim * 2. 3330 ValueError: If `mode` is "edge" "reflect" or "circular", the element's number of `paddings` is not 2, 4 or 6. 3331 ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 3, 3332 the element's number of `paddings` is not 2. 3333 ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 4, 3334 the element's number of `paddings` is not 4. 3335 ValueError: If `mode` is "circular", `x` dims equals 5, the element's number of `paddings` is not 6. 3336 ValueError: If `mode` is "edge", "reflect" or "circular", `x` dims smaller than 3. 3337 ValueError: If `mode` is "edge" or "circular", x dims bigger than 5. 3338 ValueError: If `mode` is "reflect", x dims bigger than 4. 3339 ValueError: If `mode` is "reflect", padding size bigger than the corresponding `x` dimension. 3340 ValueError: After padding, output's shape number is not greater than 0. 3341 3342 Supported Platforms: 3343 ``Ascend`` ``GPU`` ``CPU`` 3344 3345 Examples: 3346 >>> # case1: mode="reflect", paddings_contiguous=True 3347 >>> class Net(nn.Cell): 3348 ... def __init__(self, mode, paddings_contiguous): 3349 ... super(Net, self).__init__() 3350 ... self.pad = ops.PadV3(mode=mode, paddings_contiguous=paddings_contiguous) 3351 ... self.paddings = Tensor([1, 1]) 3352 ... def construct(self, x): 3353 ... return self.pad(x, self.paddings) 3354 ... 3355 >>> x = Tensor([[[0., 1.]]]) 3356 >>> pad = Net(mode="reflect", paddings_contiguous=True) 3357 >>> output = pad(x) 3358 >>> print(output) 3359 [[[1. 0. 1. 0.]]] 3360 >>> # case2: mode="constant", padding_contigous=False 3361 >>> class Net(nn.Cell): 3362 ... def __init__(self, mode, paddings_contiguous): 3363 ... super(Net, self).__init__() 3364 ... self.pad = ops.PadV3(mode=mode, paddings_contiguous=paddings_contiguous) 3365 ... self.paddings = Tensor([1, 0, 1, 0]) 3366 ... self.value = Tensor(1.5) 3367 ... def construct(self, x): 3368 ... return self.pad(x, self.paddings, self.value) 3369 ... 3370 >>> x = Tensor([[0., 1., 2.]]) 3371 >>> pad = Net(mode="constant", paddings_contiguous=False) 3372 >>> output = pad(x) 3373 >>> print(output) 3374 [[1.5 0. 1. 2. 1.5]] 3375 """ 3376 3377 @prim_attr_register 3378 def __init__(self, mode='constant', paddings_contiguous=True): 3379 """Initialize PadV3""" 3380 self.init_prim_io_names(inputs=['x', 'paddings', 'constant_value'], outputs=['y']) 3381 validator.check_string(mode, ['constant', 'reflect', 'edge', 'circular'], 'mode', self.name) 3382 validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name) 3383 self.mode = mode 3384 self.paddings_contiguous = paddings_contiguous 3385 3386 3387class MirrorPad(Primitive): 3388 """ 3389 Pads the input tensor according to the paddings and mode. 3390 3391 Args: 3392 mode (str, optional): An optional string specifying the pad method. 3393 The optional values are ``'REFLECT'`` and ``'SYMMETRIC'`` . 3394 Default: ``'REFLECT'`` . 3395 3396 - ``'REFLECT'``: Reflect the value on the edge while omitting the last one. 3397 For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2]. 3398 - ``'SYMMETRIC'``: Reflect the value on the edge while repeating the last one. 3399 For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3]. 3400 3401 Inputs: 3402 - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of 3403 additional dimensions. 3404 - **paddings** (Tensor) - Paddings requires constant tensor. The value of `paddings` is a 3405 matrix(list), and its shape is :math:`(N, 2)`. N is the rank of input data. All elements of paddings 3406 are int type. For the input in the `D` th dimension, paddings[D, 0] indicates how many sizes 3407 to be extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1] 3408 indicates how many sizes to be extended behind the input tensor in the `D` th dimension. Both 3409 paddings[D, 0] and paddings[D, 1] must be no greater than input_x.dim_size(D) 3410 (or input_x.dim_size(D) - 1) if mode is SYMMETRIC (if REFLECT, respectively). 3411 3412 Outputs: 3413 Tensor, the tensor after padding. 3414 3415 - If `mode` is ``'REFLECT'``, it uses a way of symmetrical copying through the axis of symmetry to fill in. 3416 If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the 3417 `Outputs` is [[6,5,4,5,6,5,4], [3,2,1,2,3,2,1], [6,5,4,5,6,5,4], [9,8,7,8,9,8,7], [6,5,4,5,6,5,4]]. 3418 For a more intuitive understanding, please see the example below. 3419 - If `mode` is ``'SYMMETRIC'``, the filling method is similar to the ``'REFLECT'``. It is also copied 3420 according to the symmetry axis, except that it includes the symmetry axis. If the `input_x` 3421 is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the `Outputs` is 3422 [[2,1,1,2,3,3,2], [2,1,1,2,3,3,2], [5,4,4,5,6,6,5], [8,7,7,8,9,9,8], [8,7,7,8,9,9,8]]. 3423 For a more intuitive understanding, please see the example below. 3424 3425 Raises: 3426 TypeError: If `input_x` or `paddings` is not a Tensor. 3427 TypeError: If `mode` is not a str. 3428 ValueError: If paddings.size is not equal to 2 * rank of input_x. 3429 3430 Supported Platforms: 3431 ``Ascend`` ``GPU`` ``CPU`` 3432 3433 Examples: 3434 >>> from mindspore import Tensor, nn, ops 3435 >>> # case1: mode="REFLECT" 3436 >>> class Net(nn.Cell): 3437 ... def __init__(self, mode): 3438 ... super(Net, self).__init__() 3439 ... self.pad = ops.MirrorPad(mode=mode) 3440 ... self.paddings = Tensor([[1, 1], [2, 2]]) 3441 ... def construct(self, input_x): 3442 ... return self.pad(input_x, self.paddings) 3443 ... 3444 >>> input_x = Tensor([[1,2,3], [4,5,6], [7,8,9]]) 3445 >>> pad = Net("REFLECT") 3446 >>> output = pad(input_x) 3447 >>> print(output) 3448 [[6 5 4 5 6 5 4] 3449 [3 2 1 2 3 2 1] 3450 [6 5 4 5 6 5 4] 3451 [9 8 7 8 9 8 7] 3452 [6 5 4 5 6 5 4]] 3453 >>> # case2: mode="SYMMETRIC" 3454 >>> pad = Net("SYMMETRIC") 3455 >>> output = pad(input_x) 3456 >>> print(output) 3457 [[2 1 1 2 3 3 2] 3458 [2 1 1 2 3 3 2] 3459 [5 4 4 5 6 6 5] 3460 [8 7 7 8 9 9 8] 3461 [8 7 7 8 9 9 8]] 3462 """ 3463 3464 @prim_attr_register 3465 def __init__(self, mode='REFLECT'): 3466 """Initialize Pad""" 3467 self.init_prim_io_names(inputs=['x', 'paddings'], outputs=['y']) 3468 validator.check_string(mode, ['REFLECT', 'SYMMETRIC'], 'mode', self.name) 3469 self.mode = mode 3470 3471 3472class ComputeAccidentalHits(Primitive): 3473 r""" 3474 Compute accidental hits of sampled classes which match target classes. 3475 3476 When a target class matches the sample class, we call it "accidental hit". 3477 The result of calculating accidental hits contain three parts (index, id, weight), 3478 where index represents the row number in true_classes, and id represents the position in sampled_candidates, 3479 the weight is FLOAT_MAX. FLOAT_MAX indicates the max value in the type of Float 3480 3481 Args: 3482 num_true (int): The number of target classes per training example. Default: ``1`` . 3483 3484 Inputs: 3485 - **true_classes** (Tensor) - The target classes. With data type of int64 3486 and shape :math:`(batch\_size, num\_true)`. 3487 - **sampled_candidates** (Tensor) - The Candidate sampling results of operators, types of training samples, 3488 with data type of int64 and shape :math:`(num\_sampled, )`. 3489 3490 Outputs: 3491 Tuple of 3 Tensors. 3492 3493 - **indices** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`, 3494 with data type of int32. 3495 - **ids** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`, 3496 with data type of int64. 3497 - **weights** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`, with the type float32. 3498 3499 Raises: 3500 TypeError: If dtype of `num_true` is not int. 3501 TypeError: If `true_classes` or `sampled_candidates` is not a Tensor. 3502 TypeError: If dtype of `true_classes` or `sampled_candidates` is neither int32 nor int64. 3503 3504 Supported Platforms: 3505 ``Ascend`` 3506 3507 Examples: 3508 >>> import numpy as np 3509 >>> from mindspore import Tensor, ops 3510 >>> true_classes = np.array([[1, 2], [0, 4], [3, 3]]) 3511 >>> sampled_candidates = np.array([0, 1, 2, 3, 4]) 3512 >>> sampler = ops.ComputeAccidentalHits(2) 3513 >>> indices, ids, weights = sampler(Tensor(true_classes), Tensor(sampled_candidates)) 3514 >>> print(indices, ids, weights) 3515 [0 0 1 1 2 2] 3516 [1 2 0 4 3 3] 3517 [-3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38] 3518 3519 """ 3520 3521 @prim_attr_register 3522 def __init__(self, num_true=1): 3523 """Initialize ComputeAccidentalHits""" 3524 self.init_prim_io_names(inputs=['true_classes', 'sampled_candidates'], 3525 outputs=['indices', 'ids', 'weights']) 3526 validator.check_value_type("num_true", num_true, [int], self.name) 3527 validator.check_number("num_true", num_true, 1, validator.GE, self.name) 3528 self.num_true = num_true 3529 3530 3531class ROIAlign(Primitive): 3532 r""" 3533 Computes the Region of Interest (RoI) Align operator. 3534 3535 The operator computes the value of each sampling point by bilinear interpolation from the nearby grid points on the 3536 feature map. No quantization is performed on any coordinates involved in the RoI, its bins, or the sampling 3537 points. The details of (RoI) Align operator are described in `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_. 3538 3539 Args: 3540 pooled_height (int): The output features height. 3541 pooled_width (int): The output features width. 3542 spatial_scale (float): A scaling factor that maps the raw image coordinates to the input 3543 feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the 3544 input feature map, the `spatial_scale` must be `fea_h / ori_h`. 3545 sample_num (int): Number of sampling points. Default: ``2`` . 3546 roi_end_mode (int): Number must be 0 or 1. If roi_end_mode=0, use the legacy implementation. 3547 If roi_end_mode=1, end pixel of the roi_box will be shifted by +1*spatial_scale. Default: ``1`` . 3548 3549 3550 Inputs: 3551 - **features** (Tensor) - The input features, whose shape must be :math:`(N, C, H, W)`, with data type of 3552 float16 or float32. 3553 - **rois** (Tensor) - The shape is :math:`(rois\_n, 5)`, with data type of float16 or float32. 3554 `rois_n` represents the number of RoI. The size of the second dimension must be `5` and the `5` colunms 3555 are :math:`(image\_index, top\_left\_x, top\_left\_y, bottom\_right\_x, bottom\_right\_y)`. 3556 `image_index` represents the index of image. `top_left_x` and `top_left_y` represent the `x, y` 3557 coordinates of the top left corner of corresponding RoI, respectively. `bottom_right_x` and `bottom_right_y` 3558 represent the `x, y` coordinates of the bottom right corner of corresponding RoI, respectively. 3559 3560 Outputs: 3561 Tensor, the shape is :math:`(rois\_n, C, pooled\_height, pooled\_width)`. 3562 3563 Raises: 3564 TypeError: If `pooled_height`, `pooled_width`, `sample_num` or `roi_end_mode` is not an int. 3565 TypeError: If `spatial_scale` is not a float. 3566 TypeError: If `features` or `rois` is not a Tensor. 3567 3568 Supported Platforms: 3569 ``Ascend`` ``GPU`` ``CPU`` 3570 3571 Examples: 3572 >>> import mindspore 3573 >>> import numpy as np 3574 >>> from mindspore import Tensor, ops 3575 >>> features = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32) 3576 >>> rois = Tensor(np.array([[0, 0.2, 0.3, 0.2, 0.3]]), mindspore.float32) 3577 >>> roi_align = ops.ROIAlign(2, 2, 0.5, 2) 3578 >>> output = roi_align(features, rois) 3579 >>> print(output) 3580 [[[[1.775 2.025] 3581 [2.275 2.525]]]] 3582 """ 3583 3584 @prim_attr_register 3585 def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1): 3586 """Initialize ROIAlign""" 3587 validator.check_value_type("pooled_height", pooled_height, [int], self.name) 3588 validator.check_value_type("pooled_width", pooled_width, [int], self.name) 3589 validator.check_value_type("spatial_scale", spatial_scale, [float], self.name) 3590 validator.check_value_type("sample_num", sample_num, [int], self.name) 3591 validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name) 3592 validator.check_int_range(roi_end_mode, 0, 1, validator.INC_BOTH, "roi_end_mode", self.name) 3593 self.pooled_height = pooled_height 3594 self.pooled_width = pooled_width 3595 self.spatial_scale = spatial_scale 3596 self.sample_num = sample_num 3597 self.roi_end_mode = roi_end_mode 3598 3599 3600class Adam(Primitive): 3601 r""" 3602 Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. 3603 3604 The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_. 3605 3606 For more details, please refer to :class:`mindspore.nn.Adam`. 3607 3608 The updating formulas are as follows, 3609 3610 .. math:: 3611 \begin{array}{ll} \\ 3612 m = \beta_1 * m + (1 - \beta_1) * g \\ 3613 v = \beta_2 * v + (1 - \beta_2) * g * g \\ 3614 l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ 3615 w = w - l * \frac{m}{\sqrt{v} + \epsilon} 3616 \end{array} 3617 3618 :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents 3619 `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 3620 :math:`t` represents updating step while :math:`beta_1^t(\beta_1^{t})` and :math:`beta_2^t(\beta_2^{t})` 3621 represent `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, 3622 :math:`\epsilon` represents 3623 `epsilon`. 3624 3625 Inputs of `var`, `m`, `v` and `gradient` 3626 comply with the implicit type conversion rules to make the data types consistent. 3627 If they have different data types, the lower priority data type will be converted to 3628 the relatively highest priority data type. 3629 3630 Args: 3631 use_locking (bool): Whether to enable a lock to protect variable tensors from being updated. 3632 If ``True`` , updates of the var, m, and v tensors will be protected by a lock. 3633 If ``False`` , the result is unpredictable. Default: ``False`` . 3634 use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. 3635 If ``True`` , update the gradients using NAG. 3636 If ``False`` , update the gradients without using NAG. Default: ``False`` . 3637 3638 Inputs: 3639 - **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means, 3640 any number of additional dimensions. The data type can be float16 or float32. 3641 - **m** (Parameter) - The 1st moment vector in the updating formula, 3642 the shape should be the same as `var`. 3643 - **v** (Parameter) - the 2nd moment vector in the updating formula, 3644 the shape should be the same as `var`. 3645 - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula. 3646 - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula. 3647 - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`. 3648 - **beta1** (float) - The exponential decay rate for the 1st moment estimations. 3649 The paper suggested value is :math:`0.9`. 3650 - **beta2** (float) - The exponential decay rate for the 2nd moment estimations. 3651 The paper suggested value is :math:`0.999`. 3652 - **epsilon** (float) - Term added to the denominator to improve numerical stability. 3653 - **gradient** (Tensor) - Gradient, has the same shape and data type as `var`. 3654 3655 Outputs: 3656 Tuple of 3 Tensor, the updated parameters. 3657 3658 - **var** (Tensor) - The same shape and data type as Inputs `var`. 3659 - **m** (Tensor) - The same shape and data type as Inputs `m`. 3660 - **v** (Tensor) - The same shape and data type as Inputs `v`. 3661 3662 Raises: 3663 TypeError: If neither `use_locking` nor `use_nesterov` is a bool. 3664 TypeError: If `var`, `m` or `v` is not a Parameter. 3665 TypeError: If `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient` is not a Tensor. 3666 3667 Supported Platforms: 3668 ``Ascend`` ``GPU`` ``CPU`` 3669 3670 Examples: 3671 >>> import mindspore 3672 >>> import numpy as np 3673 >>> from mindspore import Tensor, nn, ops 3674 >>> from mindspore import Parameter 3675 >>> class Net(nn.Cell): 3676 ... def __init__(self): 3677 ... super(Net, self).__init__() 3678 ... self.apply_adam = ops.Adam() 3679 ... self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") 3680 ... self.m = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="m") 3681 ... self.v = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="v") 3682 ... def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): 3683 ... out = self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, 3684 ... epsilon, grad) 3685 ... return out 3686 ... 3687 >>> net = Net() 3688 >>> gradient = Tensor(np.ones([2, 2]).astype(np.float32)) 3689 >>> output = net(0.9, 0.999, 0.001, 0.9, 0.999, 1e-8, gradient) 3690 >>> print(net.var.asnumpy()) 3691 [[0.9996838 0.9996838] 3692 [0.9996838 0.9996838]] 3693 """ 3694 __mindspore_signature__ = ( 3695 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 3696 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T1), 3697 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T2), 3698 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T3), 3699 sig.make_sig('beta2_power', dtype=sig.sig_dtype.T4), 3700 sig.make_sig('lr', dtype=sig.sig_dtype.T5), 3701 sig.make_sig('beta1', dtype=sig.sig_dtype.T6), 3702 sig.make_sig('beta2', dtype=sig.sig_dtype.T7), 3703 sig.make_sig('epsilon', dtype=sig.sig_dtype.T8), 3704 sig.make_sig('gradient', dtype=sig.sig_dtype.T) 3705 ) 3706 3707 @prim_attr_register 3708 def __init__(self, use_locking=False, use_nesterov=False): 3709 """Initialize Adam.""" 3710 validator.check_value_type("use_locking", use_locking, [bool], self.name) 3711 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 3712 self.add_prim_attr('side_effect_mem', True) 3713 3714 3715class AdamNoUpdateParam(Primitive): 3716 r""" 3717 Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This operator do not update the parameter, but 3718 calculate the value that should be added to the parameter instead. 3719 3720 The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_. 3721 3722 The updating formulas are as follows, 3723 3724 .. math:: 3725 \begin{array}{ll} \\ 3726 m = \beta_1 * m + (1 - \beta_1) * g \\ 3727 v = \beta_2 * v + (1 - \beta_2) * g * g \\ 3728 l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ 3729 \Delta{w} = - l * \frac{m}{\sqrt{v} + \epsilon} 3730 \end{array} 3731 3732 :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents 3733 `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 3734 :math:`t` represents updating step while :math:`beta_1^t(\beta_1^{t})` and :math:`beta_2^t(\beta_2^{t})` 3735 represent `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, 3736 :math:`w` represents the parameter to be updated, :math:`\epsilon` represents `epsilon`. 3737 3738 Args: 3739 use_locking (bool): Whether to enable a lock to protect variable tensors from being updated. 3740 If ``True`` , updates of the var, m, and v tensors will be protected by a lock. 3741 If ``False`` , the result is unpredictable. Default: ``False`` . 3742 use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. 3743 If ``True`` , update the gradients using NAG. 3744 If ``False`` , update the gradients without using NAG. Default: ``False`` . 3745 3746 Inputs: 3747 - **m** (Tensor) - The 1st moment vector in the updating formula. The shape is :math:`(N, *)` 3748 where :math:`*` means, any number of additional dimensions. The data type must be float32. 3749 - **v** (Tensor) - the 2nd moment vector in the updating formula. The shape must be the same as `m`. 3750 The data type must be float32. 3751 - **beta1_power** (Tensor) - :math:`beta_1^t(\beta_1^{t})` in the updating formula. 3752 The shape is :math:`(1, )` and the data type must be float32. 3753 - **beta2_power** (Tensor) - :math:`beta_2^t(\beta_2^{t})` in the updating formula. 3754 The shape is :math:`(1, )` and the data type must be float32. 3755 - **lr** (Tensor) - :math:`l` in the updating formula. 3756 The shape is :math:`(1, )` and the data type must be float32. 3757 - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations. 3758 The shape is :math:`(1, )` and the data type must be float32. 3759 - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations. 3760 The shape is :math:`(1, )` and the data type must be float32. 3761 - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability. 3762 The shape is :math:`(1, )` and the data type must be float32. 3763 - **gradient** (Tensor) - Gradient, the shape must be the same as `m`, the data type must be float32. 3764 3765 Outputs: 3766 Tensor, whose shape and data type are the same with Inputs `gradient`, is a value that should be added to the 3767 parameter to be updated. 3768 3769 Raises: 3770 TypeError: If neither `use_locking` nor `use_nesterov` is a bool. 3771 TypeError: If `m`, `v`, `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient` 3772 is not a Tensor. 3773 3774 Supported Platforms: 3775 ``CPU`` 3776 3777 Examples: 3778 >>> class Net(nn.Cell): 3779 ... def __init__(self): 3780 ... super(Net, self).__init__() 3781 ... self.adam = ops.AdamNoUpdateParam() 3782 ... self.m = Parameter(Tensor(np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2]]).astype(np.float32)), 3783 ... name="m") 3784 ... self.v = Parameter(Tensor(np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2]]).astype(np.float32)), 3785 ... name="v") 3786 ... def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): 3787 ... out = self.adam(self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) 3788 ... return out 3789 >>> net = Net() 3790 >>> beta1_power = Tensor(0.9, ms.float32) 3791 >>> beta2_power = Tensor(0.999, ms.float32) 3792 >>> lr = Tensor(0.001, ms.float32) 3793 >>> beta1 = Tensor(0.9, ms.float32) 3794 >>> beta2 = Tensor(0.999, ms.float32) 3795 >>> epsilon = Tensor(1e-8, ms.float32) 3796 >>> gradient = Tensor(np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1]]).astype(np.float32)) 3797 >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient) 3798 >>> print(result) 3799 [[-0.00010004 -0.00010004 -0.00010004] 3800 [-0.00013441 -0.00013441 -0.00013441]] 3801 3802 """ 3803 3804 @prim_attr_register 3805 def __init__(self, use_locking=False, use_nesterov=False): 3806 """Initialize AdamNoUpdateParam.""" 3807 validator.check_value_type("use_locking", use_locking, [bool], self.name) 3808 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 3809 3810 3811class FusedSparseAdam(Primitive): 3812 r""" 3813 Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam) 3814 algorithm. This operator is used when the gradient is sparse. 3815 3816 The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_. 3817 3818 The updating formulas are as follows, 3819 3820 .. math:: 3821 \begin{array}{ll} \\ 3822 m = \beta_1 * m + (1 - \beta_1) * g \\ 3823 v = \beta_2 * v + (1 - \beta_2) * g * g \\ 3824 l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ 3825 w = w - l * \frac{m}{\sqrt{v} + \epsilon} 3826 \end{array} 3827 3828 :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents 3829 `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 3830 :math:`t` represents updating step while :math:`\beta_1^t` and :math:`\beta_2^t` represent `beta1_power` and 3831 `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents 3832 `epsilon`. 3833 3834 All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent. 3835 If they have different data types, the lower priority data type will be converted to 3836 the relatively highest priority data type. 3837 3838 Args: 3839 use_locking (bool): Whether to enable a lock to protect variable tensors from being updated. 3840 If ``True`` , updates of the var, m, and v tensors will be protected by a lock. 3841 If ``False`` , the result is unpredictable. Default: ``False`` . 3842 use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. 3843 If ``True`` , update the gradients using NAG. 3844 If ``False`` , update the gradients without using NAG. Default: ``False`` . 3845 3846 Inputs: 3847 - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)` 3848 where :math:`*` means, any number of additional dimensions. 3849 - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`. 3850 - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`. 3851 Mean square gradients, has the same type as `var` with float32 data type. 3852 - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type. 3853 The shape is :math:`(1, )`. 3854 - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type. 3855 The shape is :math:`(1, )`. 3856 - **lr** (Tensor) - :math:`l` in the updating formula. With float32 data type. 3857 The shape is :math:`(1, )`. 3858 - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type. 3859 The shape is :math:`(1, )`. 3860 - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type. 3861 The shape is :math:`(1, )`. 3862 - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type. 3863 The shape is :math:`(1, )`. 3864 - **gradient** (Tensor) - Gradient, has the same data type as `var` and 3865 gradient.shape[1:] = var.shape[1:] if var.shape > 1. 3866 - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0]. 3867 3868 Outputs: 3869 Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless. 3870 3871 - **var** (Tensor) - A Tensor with shape :math:`(N, *)`. 3872 - **m** (Tensor) - A Tensor with shape :math:`(1, )`. 3873 - **v** (Tensor) - A Tensor with shape :math:`(1, )`. 3874 3875 Raises: 3876 TypeError: If neither `use_locking` nor `use_neserov` is a bool. 3877 TypeError: If dtype of `var`, `m`, `v`, `beta1_power`, `beta2_power`, `lr`, `beta1`, `beta2`, `epsilon`, 3878 `gradient` or `indices` is not float32. 3879 RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported. 3880 3881 Supported Platforms: 3882 ``Ascend`` ``CPU`` 3883 3884 Examples: 3885 >>> class Net(nn.Cell): 3886 ... def __init__(self): 3887 ... super(Net, self).__init__() 3888 ... self.sparse_apply_adam = ops.FusedSparseAdam() 3889 ... self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") 3890 ... self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m") 3891 ... self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v") 3892 ... def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): 3893 ... out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, 3894 ... epsilon, grad, indices) 3895 ... return out 3896 ... 3897 >>> net = Net() 3898 >>> beta1_power = Tensor(0.9, mindspore.float32) 3899 >>> beta2_power = Tensor(0.999, mindspore.float32) 3900 >>> lr = Tensor(0.001, mindspore.float32) 3901 >>> beta1 = Tensor(0.9, mindspore.float32) 3902 >>> beta2 = Tensor(0.999, mindspore.float32) 3903 >>> epsilon = Tensor(1e-8, mindspore.float32) 3904 >>> gradient = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]), mindspore.float32) 3905 >>> indices = Tensor([0, 1], mindspore.int32) 3906 >>> output = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) 3907 >>> print(net.var.asnumpy()) 3908 [[[0.9997121 0.9997121 ]] 3909 [[0.9997121 0.9997121 ]] 3910 [[0.99971527 0.99971527]]] 3911 """ 3912 __mindspore_signature__ = ( 3913 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 3914 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 3915 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 3916 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T), 3917 sig.make_sig('beta2_power', dtype=sig.sig_dtype.T), 3918 sig.make_sig('lr', dtype=sig.sig_dtype.T), 3919 sig.make_sig('beta1', dtype=sig.sig_dtype.T), 3920 sig.make_sig('beta2', dtype=sig.sig_dtype.T), 3921 sig.make_sig('epsilon', dtype=sig.sig_dtype.T), 3922 sig.make_sig('grad', dtype=sig.sig_dtype.T), 3923 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 3924 ) 3925 3926 @prim_attr_register 3927 def __init__(self, use_locking=False, use_nesterov=False): 3928 """Initialize FusedSparseAdam.""" 3929 validator.check_value_type("use_locking", use_locking, [bool], self.name) 3930 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 3931 self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2', 3932 'epsilon', 'grad', 'indices'], 3933 outputs=['var', 'm', 'v']) 3934 self.add_prim_attr('side_effect_mem', True) 3935 3936 3937class FusedSparseLazyAdam(Primitive): 3938 r""" 3939 Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam) 3940 algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the 3941 original Adam algorithm, as only the current indices parameters will be updated. 3942 3943 The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_. 3944 3945 The updating formulas are as follows, 3946 3947 .. math:: 3948 \begin{array}{ll} \\ 3949 m = \beta_1 * m + (1 - \beta_1) * g \\ 3950 v = \beta_2 * v + (1 - \beta_2) * g * g \\ 3951 l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ 3952 w = w - l * \frac{m}{\sqrt{v} + \epsilon} 3953 \end{array} 3954 3955 :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents 3956 `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 3957 :math:`t` represents updating step while :math:`\beta_1^t` and :math:`\beta_2^t` represent `beta1_power` and 3958 `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents 3959 `epsilon`. 3960 3961 All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent. 3962 If they have different data types, the lower priority data type will be converted to 3963 the relatively highest priority data type. 3964 3965 Args: 3966 use_locking (bool): Whether to enable a lock to protect variable tensors from being updated. 3967 If ``True`` , updates of the var, m, and v tensors will be protected by a lock. 3968 If ``False`` , the result is unpredictable. Default: ``False`` . 3969 use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. 3970 If ``True`` , update the gradients using NAG. 3971 If ``False`` , update the gradients without using NAG. Default: ``False`` . 3972 3973 Inputs: 3974 - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)` 3975 where :math:`*` means, any number of additional dimensions. 3976 - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`. 3977 - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`. 3978 Mean square gradients, has the same type as `var` with float32 data type. 3979 - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type. 3980 The shape is :math:`(1, )`. 3981 - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type. 3982 The shape is :math:`(1, )`. 3983 - **lr** (Tensor) - :math:`l` in the updating formula with float32 data type. 3984 The shape is :math:`(1, )`. 3985 - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type. 3986 The shape is :math:`(1, )`. 3987 - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type. 3988 The shape is :math:`(1, )`. 3989 - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type. 3990 The shape is :math:`(1, )`. 3991 - **gradient** (Tensor) - Gradient value with float32 data type and 3992 gradient.shape[1:] = var.shape[1:] if var.shape > 1. 3993 - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0]. 3994 3995 Outputs: 3996 Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless. 3997 3998 - **var** (Tensor) - A Tensor with shape :math:`(N, *)`. 3999 - **m** (Tensor) - A Tensor with shape :math:`(1, )`. 4000 - **v** (Tensor) - A Tensor with shape :math:`(1, )`. 4001 4002 Raises: 4003 TypeError: If neither `use_locking` nor `use_nestrov` is a bool. 4004 TypeError: If dtype of `var`, `m`, `v`, `beta1_power`, `beta2_power`, `lr`, `beta1`, `beta2`, `epsilon` or 4005 gradient is not float32. 4006 TypeError: If dtype of `indices` is not int32. 4007 RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported. 4008 4009 Supported Platforms: 4010 ``Ascend`` ``CPU`` 4011 4012 Examples: 4013 >>> class Net(nn.Cell): 4014 ... def __init__(self): 4015 ... super(Net, self).__init__() 4016 ... self.sparse_apply_lazyadam = ops.FusedSparseLazyAdam() 4017 ... self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") 4018 ... self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m") 4019 ... self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v") 4020 ... def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): 4021 ... out = self.sparse_apply_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, 4022 ... beta2, epsilon, grad, indices) 4023 ... return out 4024 ... 4025 >>> net = Net() 4026 >>> beta1_power = Tensor(0.9, mindspore.float32) 4027 >>> beta2_power = Tensor(0.999, mindspore.float32) 4028 >>> lr = Tensor(0.001, mindspore.float32) 4029 >>> beta1 = Tensor(0.9, mindspore.float32) 4030 >>> beta2 = Tensor(0.999, mindspore.float32) 4031 >>> epsilon = Tensor(1e-8, mindspore.float32) 4032 >>> gradient = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]), mindspore.float32) 4033 >>> indices = Tensor([0, 1], mindspore.int32) 4034 >>> output = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) 4035 >>> print(net.var.asnumpy()) 4036 [[[0.9997121 0.9997121 ]] 4037 [[0.9997121 0.9997121 ]] 4038 [[1. 1. ]]] 4039 """ 4040 __mindspore_signature__ = ( 4041 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4042 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4043 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4044 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T), 4045 sig.make_sig('beta2_power', dtype=sig.sig_dtype.T), 4046 sig.make_sig('lr', dtype=sig.sig_dtype.T), 4047 sig.make_sig('beta1', dtype=sig.sig_dtype.T), 4048 sig.make_sig('beta2', dtype=sig.sig_dtype.T), 4049 sig.make_sig('epsilon', dtype=sig.sig_dtype.T), 4050 sig.make_sig('grad', dtype=sig.sig_dtype.T), 4051 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 4052 ) 4053 4054 @prim_attr_register 4055 def __init__(self, use_locking=False, use_nesterov=False): 4056 """Initialize FusedSparseLazyAdam.""" 4057 validator.check_value_type("use_locking", use_locking, [bool], self.name) 4058 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 4059 self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2', 4060 'epsilon', 'grad', 'indices'], 4061 outputs=['var', 'm', 'v']) 4062 self.add_prim_attr('side_effect_mem', True) 4063 4064 4065class FusedSparseFtrl(Primitive): 4066 """ 4067 Merges the duplicate value of the gradient and then updates relevant entries according to the FTRL-proximal scheme. 4068 4069 All inputs except `indices` comply with the implicit type conversion rules to make the data types consistent. 4070 If they have different data types, the lower priority data type will be converted to 4071 the relatively highest priority data type. 4072 4073 Args: 4074 lr (float): The learning rate value, must be positive. 4075 l1 (float): l1 regularization strength, must be greater than or equal to zero. 4076 l2 (float): l2 regularization strength, must be greater than or equal to zero. 4077 lr_power (float): Learning rate power controls how the learning rate decreases during training, 4078 must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero. 4079 use_locking (bool): Use locks for updating operation if True . Default: ``False`` . 4080 4081 Inputs: 4082 - **var** (Parameter) - The variable to be updated. The data type must be float32. The shape is :math:`(N, *)` 4083 where :math:`*` means, any number of additional dimensions. 4084 - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`. 4085 - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`. 4086 - **grad** (Tensor) - A tensor of the same type as `var` and 4087 grad.shape[1:] = var.shape[1:] if var.shape > 1. 4088 - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. 4089 The type must be int32 and indices.shape[0] = grad.shape[0]. 4090 4091 Outputs: 4092 Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless. 4093 4094 - **var** (Tensor) - A Tensor with shape :math:`(N, *)`. 4095 - **accum** (Tensor) - A Tensor with shape :math:`(1, )`. 4096 - **linear** (Tensor) - A Tensor with shape :math:`(1, )`. 4097 4098 Raises: 4099 TypeError: If `lr`, `l1`, `l2` or `lr_power` is not a float. 4100 ValueError: If shape of `lr_power` less than or equal to zero. 4101 TypeError: If dtype of `var` is not float32. 4102 TypeError: If dtype of `indices` is not int32. 4103 TypeError: If shape of `accum`, `linear` or `grad` is not same as `var`. 4104 TypeError: If shape of `indices` is not same as shape of first dimension of `grad`. 4105 RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported. 4106 4107 Supported Platforms: 4108 ``Ascend`` ``CPU`` 4109 4110 Examples: 4111 >>> class SparseApplyFtrlNet(nn.Cell): 4112 ... def __init__(self): 4113 ... super(SparseApplyFtrlNet, self).__init__() 4114 ... self.sparse_apply_ftrl = ops.FusedSparseFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5) 4115 ... self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") 4116 ... self.accum = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="accum") 4117 ... self.linear = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="linear") 4118 ... 4119 ... def construct(self, grad, indices): 4120 ... out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices) 4121 ... return out 4122 ... 4123 >>> net = SparseApplyFtrlNet() 4124 >>> grad = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]).astype(np.float32)) 4125 >>> indices = Tensor(np.array([0, 1]).astype(np.int32)) 4126 >>> output = net(grad, indices) 4127 >>> print(net.var.asnumpy()) 4128 [[[-0.00598256 -0.00598256]] 4129 [[-0.00598256 -0.00598256]] 4130 [[ 1. 1. ]]] 4131 """ 4132 __mindspore_signature__ = ( 4133 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4134 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4135 sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4136 sig.make_sig('grad', dtype=sig.sig_dtype.T), 4137 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 4138 ) 4139 4140 @prim_attr_register 4141 def __init__(self, lr, l1, l2, lr_power, use_locking=False): 4142 """Initialize FusedSparseFtrl.""" 4143 self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'], 4144 outputs=['output']) 4145 self.add_prim_attr('side_effect_mem', True) 4146 4147 validator.check_value_type("lr", lr, [float], self.name) 4148 validator.check_value_type("l1", l1, [float], self.name) 4149 validator.check_value_type("l2", l2, [float], self.name) 4150 validator.check_value_type("lr_power", lr_power, [float], self.name) 4151 self.lr = validator.check_positive_float(lr, "lr", self.name) 4152 self.l1 = validator.check_non_negative_float(l1, "l1", self.name) 4153 self.l2 = validator.check_non_negative_float(l2, "l2", self.name) 4154 self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name) 4155 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 4156 4157 4158class FusedSparseProximalAdagrad(Primitive): 4159 r""" 4160 Merges the duplicate value of the gradient and then updates relevant entries according to the proximal adagrad 4161 algorithm. 4162 4163 .. math:: 4164 \begin{array}{ll} \\ 4165 accum += grad * grad \\ 4166 \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\ 4167 var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0) 4168 \end{array} 4169 4170 All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent. 4171 If they have different data types, the lower priority data type will be converted to 4172 the relatively highest priority data type. 4173 4174 Args: 4175 use_locking (bool): If ``True`` , the variable and accumulation tensors will be protected from being updated. 4176 Default: ``False`` . 4177 4178 Inputs: 4179 - **var** (Parameter) - Variable tensor to be updated. The data type must be float32. 4180 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4181 - **accum** (Parameter) - Variable tensor to be updated, has the same shape and data type as `var`. 4182 - **lr** (Tensor) - The learning rate value. The data type must be float32. The shape is :math:`(1, )`. 4183 - **l1** (Tensor) - l1 regularization strength. The data type must be float32. The shape is :math:`(1, )`. 4184 - **l2** (Tensor) - l2 regularization strength. The data type must be float32. The shape is :math:`(1, )`. 4185 - **grad** (Tensor) - A tensor of the same data type as `var` and 4186 grad.shape[1:] = var.shape[1:] if var.shape > 1. 4187 - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. 4188 The type must be int32 and indices.shape[0] = grad.shape[0]. 4189 4190 Outputs: 4191 Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless. 4192 4193 - **var** (Tensor) - A Tensor with shape :math:`(N, *)`. 4194 - **accum** (Tensor) - A Tensor with shape :math:`(1, )`. 4195 4196 Raises: 4197 TypeError: If `use_locking` is not a bool. 4198 TypeError: If dtype of `var`, `accum`, `lr`, `l1`, `l2` or `grad` is not float32. 4199 TypeError: If dtype of `indices` is not int32. 4200 RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported. 4201 4202 Supported Platforms: 4203 ``Ascend`` ``CPU`` 4204 4205 Examples: 4206 >>> class Net(nn.Cell): 4207 ... def __init__(self): 4208 ... super(Net, self).__init__() 4209 ... self.sparse_apply_proximal_adagrad = ops.FusedSparseProximalAdagrad() 4210 ... self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") 4211 ... self.accum = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="accum") 4212 ... self.lr = Tensor(0.01, mindspore.float32) 4213 ... self.l1 = Tensor(0.0, mindspore.float32) 4214 ... self.l2 = Tensor(0.0, mindspore.float32) 4215 ... def construct(self, grad, indices): 4216 ... out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, 4217 ... self.l2, grad, indices) 4218 ... return out 4219 ... 4220 >>> net = Net() 4221 >>> grad = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]).astype(np.float32)) 4222 >>> indices = Tensor(np.array([0, 1]).astype(np.int32)) 4223 >>> output = net(grad, indices) 4224 >>> print(net.var.asnumpy()) 4225 [[[0.99900496 0.99900496]] 4226 [[0.99900496 0.99900496]] 4227 [[1. 1. ]]] 4228 """ 4229 __mindspore_signature__ = ( 4230 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4231 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4232 sig.make_sig('lr', dtype=sig.sig_dtype.T), 4233 sig.make_sig('l1', dtype=sig.sig_dtype.T), 4234 sig.make_sig('l2', dtype=sig.sig_dtype.T), 4235 sig.make_sig('grad', dtype=sig.sig_dtype.T), 4236 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 4237 ) 4238 4239 @prim_attr_register 4240 def __init__(self, use_locking=False): 4241 """Initialize FusedSparseProximalAdagrad""" 4242 self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'], 4243 outputs=['output']) 4244 self.add_prim_attr('side_effect_mem', True) 4245 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 4246 4247 4248class KLDivLoss(Primitive): 4249 r""" 4250 Computes the Kullback-Leibler divergence between the logits and the labels. 4251 4252 For tensors of the same shape :math:`x` and :math:`target`, 4253 the updating formulas of KLDivLoss algorithm are as follows, 4254 4255 .. math:: 4256 L(x, target) = target \cdot (\log target - x) 4257 4258 Then, 4259 4260 .. math:: 4261 \ell(x, target) = \begin{cases} 4262 L(x, target), & \text{if reduction} = \text{'none';}\\ 4263 \operatorname{mean}(L(x, target)), & \text{if reduction} = \text{'mean';}\\ 4264 \operatorname{sum}(L(x, target)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\ 4265 \operatorname{sum}(L(x, target)), & \text{if reduction} = \text{'sum'.} 4266 \end{cases} 4267 4268 where :math:`x` represents `logits`, 4269 :math:`target` represents `labels`, and 4270 :math:`\ell(x, target)` represents `output`. 4271 4272 Note: 4273 - On Ascend, float64 dtype is not currently supported. 4274 - The output aligns with the mathematical definition of Kullback-Leibler divergence 4275 only when `reduction` is set to ``'batchmean'``. 4276 - On Ascend, the value of `reduction` must be one of ``'batchmean'``, ``'none'`` or ``'sum'``. 4277 - On GPU, the value of `reduction` must be one of ``'mean'``, ``'none'`` or ``'sum'``. 4278 - On CPU, the value of `reduction` must be one of ``'mean'``, ``'batchmean'``, ``'none'`` 4279 or ``'sum'``. 4280 4281 Args: 4282 reduction (str): Specifies the reduction to be applied to the output. 4283 Default: ``'mean'`` . 4284 4285 - ``'none'``: no reduction will be applied. 4286 - ``'mean'``: compute and return the mean of elements in the output. 4287 - ``'sum'``: the output elements will be summed. 4288 - ``'batchmean'``: average loss is taken over the batch, similar to the mean mode. 4289 4290 Inputs: 4291 - **logits** (Tensor) - The input Tensor. The data type must be float16, float32 or float64. 4292 - **labels** (Tensor) - The label Tensor which has the same shape and data type as `logits`. 4293 4294 Outputs: 4295 Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`. 4296 Otherwise it is a scalar. 4297 4298 Raises: 4299 TypeError: If `reduction` is not a str. 4300 TypeError: If neither `logits` nor `labels` is a Tensor. 4301 TypeError: If dtype of `logits` or `labels` is not currently supported. 4302 ValueError: If shape of `logits` is not the same as `labels`. 4303 RuntimeError: If `logits` or `labels` is a scalar when `reduction` is 'batchmean'. 4304 4305 Supported Platforms: 4306 ``Ascend`` ``GPU`` ``CPU`` 4307 4308 Examples: 4309 >>> import mindspore 4310 >>> import numpy as np 4311 >>> from mindspore import Tensor, nn, ops 4312 >>> class Net(nn.Cell): 4313 ... def __init__(self): 4314 ... super(Net, self).__init__() 4315 ... self.kldiv_loss = ops.KLDivLoss(reduction='sum') 4316 ... def construct(self, logits, labels): 4317 ... result = self.kldiv_loss(logits, labels) 4318 ... return result 4319 ... 4320 >>> net = Net() 4321 >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32) 4322 >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32) 4323 >>> output = net(logits, labels) 4324 >>> print(output) 4325 -0.7 4326 """ 4327 4328 @prim_attr_register 4329 def __init__(self, reduction='mean'): 4330 """Initialize KLDivLoss.""" 4331 device_target = context.get_context("device_target") 4332 if device_target == "CPU": 4333 support_mode = ['none', 'mean', 'batchmean', 'sum'] 4334 elif device_target == "GPU": 4335 support_mode = ['none', 'mean', 'sum'] 4336 elif device_target == "Ascend": 4337 support_mode = ['none', 'batchmean', 'sum', 'mean'] 4338 else: 4339 raise ValueError(f"'{self.name}' unknown device target: '{device_target}'") 4340 4341 self.reduction = validator.check_string(reduction, support_mode, 'reduction', self.name) 4342 4343 4344class ApplyAdaMax(Primitive): 4345 r""" 4346 Updates relevant entries according to the adamax scheme. 4347 4348 The updating formulas are as follows, 4349 4350 .. math:: 4351 \begin{array}{ll} \\ 4352 m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\ 4353 v_{t+1} = \max(\beta_2 * v_{t}, \left| g \right|) \\ 4354 var = var - \frac{l}{1 - \beta_1^{t+1}} * \frac{m_{t+1}}{v_{t+1} + \epsilon} 4355 \end{array} 4356 4357 :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}` 4358 is the last moment of :math:`m_{t+1}`, :math:`v` represents the 2nd moment vector, :math:`v_{t}` 4359 is the last moment of :math:`v_{t+1}`, :math:`l` represents scaling factor `lr`, 4360 :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 4361 :math:`\beta_1^{t+1}` represents `beta1_power`, :math:`var` represents the variable to be updated, 4362 :math:`\epsilon` represents `epsilon`. 4363 4364 Inputs of `var`, `m`, `v` and `grad` comply with the implicit type conversion rules 4365 to make the data types consistent. 4366 If they have different data types, the lower priority data type will be converted to 4367 the relatively highest priority data type. 4368 4369 Inputs: 4370 - **var** (Parameter) - Variable to be updated. With float32 or float16 data type. 4371 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4372 - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape as `var`. 4373 With float32 or float16 data type. 4374 - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients 4375 with the same shape as `var`. With float32 or float16 data type. 4376 - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be a scalar. 4377 With float32 or float16 data type. 4378 - **lr** (Union[Number, Tensor]) - Learning rate, :math:`l` in the updating formula, must be a scalar. 4379 With float32 or float16 data type. 4380 - **beta1** (Union[Number, Tensor]) - The exponential decay rate for the 1st moment estimations, 4381 must be a scalar. With float32 or float16 data type. 4382 - **beta2** (Union[Number, Tensor]) - The exponential decay rate for the 2nd moment estimations, 4383 must be a scalar. With float32 or float16 data type. 4384 - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar. 4385 With float32 or float16 data type. 4386 - **grad** (Tensor) - A tensor for gradient, has the same shape as `var`. 4387 With float32 or float16 data type. 4388 4389 Outputs: 4390 Tuple of 3 Tensor, the updated parameters. 4391 4392 - **var** (Tensor) - The same shape and data type as `var`. 4393 - **m** (Tensor) - The same shape and data type as `m`. 4394 - **v** (Tensor) - The same shape and data type as `v`. 4395 4396 Raises: 4397 TypeError: If dtype of `var`, `m`, `v`, `beta_power`, `lr`, `beta1`, `beta2`, `epsilon` or `grad` is neither 4398 float16 nor float32. 4399 TypeError: If `beta_power`, `lr`, `beta1`, `beta2` or `epsilon` is neither a Number nor a Tensor. 4400 TypeError: If `grad` is not a Tensor. 4401 TypeError: If the data type of `var`, `m`, `v` and `grad` conversion of Parameter is not supported. 4402 4403 Supported Platforms: 4404 ``Ascend`` ``GPU`` ``CPU`` 4405 4406 Examples: 4407 >>> import mindspore 4408 >>> import numpy as np 4409 >>> from mindspore import Tensor, nn, ops, Parameter 4410 >>> class Net(nn.Cell): 4411 ... def __init__(self): 4412 ... super(Net, self).__init__() 4413 ... self.apply_ada_max = ops.ApplyAdaMax() 4414 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 4415 ... [0.1, 0.5]]).astype(np.float32)), name="var") 4416 ... self.m = Parameter(Tensor(np.array([[0.6, 0.5], 4417 ... [0.2, 0.6]]).astype(np.float32)), name="m") 4418 ... self.v = Parameter(Tensor(np.array([[0.9, 0.1], 4419 ... [0.7, 0.8]]).astype(np.float32)), name="v") 4420 ... def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad): 4421 ... out = self.apply_ada_max(self.var, self.m, self.v, beta1_power, lr, beta1, beta2, epsilon, grad) 4422 ... return out 4423 ... 4424 >>> net = Net() 4425 >>> beta1_power =Tensor(0.9, mindspore.float32) 4426 >>> lr = Tensor(0.001, mindspore.float32) 4427 >>> beta1 = Tensor(0.9, mindspore.float32) 4428 >>> beta2 = Tensor(0.99, mindspore.float32) 4429 >>> epsilon = Tensor(1e-10, mindspore.float32) 4430 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 4431 >>> output = net(beta1_power, lr, beta1, beta2, epsilon, grad) 4432 >>> print(output) 4433 (Tensor(shape=[2, 2], dtype=Float32, value= 4434 [[ 5.93602717e-01, 3.92571449e-01], 4435 [ 9.72582996e-02, 4.92249995e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4436 [[ 5.69999993e-01, 5.19999981e-01], 4437 [ 1.89999998e-01, 6.20000005e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4438 [[ 8.90999973e-01, 6.99999988e-01], 4439 [ 6.93000019e-01, 8.00000012e-01]])) 4440 """ 4441 4442 __mindspore_signature__ = ( 4443 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4444 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4445 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4446 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T1), 4447 sig.make_sig('lr', dtype=sig.sig_dtype.T2), 4448 sig.make_sig('beta1', dtype=sig.sig_dtype.T3), 4449 sig.make_sig('beta2', dtype=sig.sig_dtype.T4), 4450 sig.make_sig('epsilon', dtype=sig.sig_dtype.T5), 4451 sig.make_sig('grad', dtype=sig.sig_dtype.T) 4452 ) 4453 4454 @prim_attr_register 4455 def __init__(self): 4456 """Initialize ApplyAdaMax""" 4457 self.add_prim_attr('side_effect_mem', True) 4458 4459 4460class ApplyAdadelta(Primitive): 4461 r""" 4462 Updates relevant entries according to the adadelta scheme. 4463 4464 The Adadelta algorithm is proposed in 4465 `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_. 4466 4467 .. math:: 4468 \begin{array}{ll} \\ 4469 \text{accum} = \rho * \text{accum} + (1 - \rho) * \text{grad}^2 \\ 4470 \text{update} = \sqrt{\text{accum_update} + 4471 \epsilon} * \frac{\text{grad}}{\sqrt{\text{accum} + \epsilon}} \\ 4472 \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * \text{update}^2 \\ 4473 \text{var} = \text{var} - \text{lr} * \text{update} 4474 \end{array} 4475 4476 where :math:`\rho` represents `rho`, :math:`\epsilon` represents `epsilon`. 4477 4478 Inputs of `var`, `accum`, `accum_update` and `grad` comply with the implicit type conversion rules 4479 to make the data types consistent. 4480 If they have different data types, the lower priority data type will be converted to 4481 the relatively highest priority data type. 4482 4483 Inputs: 4484 - **var** (Parameter) - Weights to be updated. With float32 or float16 data type. 4485 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4486 - **accum** (Parameter) - Accumulation to be updated, has the same shape and data type as `var`. 4487 - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and data type as `var`. 4488 - **lr** (Union[Number, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type. 4489 - **rho** (Union[Number, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type. 4490 - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar. 4491 With float32 or float16 data type. 4492 - **grad** (Tensor) - Gradients, has the same shape and data type as `var`. 4493 4494 Outputs: 4495 Tuple of 3 Tensor, the updated parameters. 4496 4497 - **var** (Tensor) - The same shape and data type as `var`. 4498 - **accum** (Tensor) - The same shape and data type as `accum`. 4499 - **accum_update** (Tensor) - The same shape and data type as `accum_update`. 4500 4501 Raises: 4502 TypeError: If dtype of `var`, `accum`, `accum_update`, `lr`, `rho`, `epsilon` or `grad` is neither float16 nor 4503 float32. 4504 TypeError: If `accum_update`, `lr`, `rho` or `epsilon` is neither a Number nor a Tensor. 4505 TypeError: If the data type of `var`, `accum`, `accum_update` and `grad` conversion of Parameter 4506 is not supported. 4507 4508 Supported Platforms: 4509 ``Ascend`` ``GPU`` ``CPU`` 4510 4511 Examples: 4512 >>> import numpy as np 4513 >>> import mindspore 4514 >>> from mindspore import nn, Tensor, ops, Parameter 4515 >>> class Net(nn.Cell): 4516 ... def __init__(self): 4517 ... super(Net, self).__init__() 4518 ... self.apply_adadelta = ops.ApplyAdadelta() 4519 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 4520 ... [0.1, 0.5]]).astype(np.float32)), name="var") 4521 ... self.accum = Parameter(Tensor(np.array([[0.6, 0.5], 4522 ... [0.2, 0.6]]).astype(np.float32)), name="accum") 4523 ... self.accum_update = Parameter(Tensor(np.array([[0.9, 0.1], 4524 ... [0.7, 0.8]]).astype(np.float32)), 4525 ... name="accum_update") 4526 ... def construct(self, lr, rho, epsilon, grad): 4527 ... out = self.apply_adadelta(self.var, self.accum, self.accum_update, lr, rho, epsilon, grad) 4528 ... return out 4529 ... 4530 >>> net = Net() 4531 >>> lr = Tensor(0.001, mindspore.float32) 4532 >>> rho = Tensor(0.0, mindspore.float32) 4533 >>> epsilon = Tensor(1e-6, mindspore.float32) 4534 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 4535 >>> output = net(lr, rho, epsilon, grad) 4536 >>> print(output) 4537 (Tensor(shape=[2, 2], dtype=Float32, value= 4538 [[ 5.99051356e-01, 3.99683774e-01], 4539 [ 9.91633832e-02, 4.99105573e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4540 [[ 9.00000036e-02, 4.89999980e-01], 4541 [ 1.00000007e-02, 6.40000045e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4542 [[ 8.99990857e-01, 1.00000791e-01], 4543 [ 6.99930906e-01, 7.99999774e-01]])) 4544 """ 4545 4546 __mindspore_signature__ = ( 4547 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4548 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4549 sig.make_sig('accum_update', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4550 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 4551 sig.make_sig('rho', dtype=sig.sig_dtype.T2), 4552 sig.make_sig('epsilon', dtype=sig.sig_dtype.T3), 4553 sig.make_sig('grad', dtype=sig.sig_dtype.T) 4554 ) 4555 4556 @prim_attr_register 4557 def __init__(self): 4558 """Initialize ApplyAdadelta""" 4559 self.add_prim_attr('side_effect_mem', True) 4560 4561 4562class ApplyAdagrad(Primitive): 4563 r""" 4564 Updates relevant entries according to the adagrad scheme. 4565 The Adagrad algorithm was proposed in 4566 `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization 4567 <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_. 4568 This module can adaptively assign different learning rates for each parameter in view of the uneven number 4569 of samples for different parameters. 4570 4571 .. math:: 4572 \begin{array}{ll} \\ 4573 accum += grad * grad \\ 4574 var -= lr * grad * \frac{1}{\sqrt{accum}} 4575 \end{array} 4576 4577 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 4578 to make the data types consistent. 4579 If they have different data types, the lower priority data type will be converted to 4580 the relatively highest priority data type. 4581 4582 Args: 4583 update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` . 4584 4585 Inputs: 4586 - **var** (Parameter) - Variable to be updated. With float or complex data type. 4587 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4588 - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`. 4589 - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type. 4590 - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`. 4591 4592 Outputs: 4593 Tuple of 2 Tensors, the updated parameters. 4594 4595 - **var** (Tensor) - The same shape and data type as `var`. 4596 - **accum** (Tensor) - The same shape and data type as `accum`. 4597 4598 Raises: 4599 TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float nor complex. 4600 TypeError: If `lr` is neither a Number nor a Tensor. 4601 TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 4602 4603 Supported Platforms: 4604 ``Ascend`` ``GPU`` ``CPU`` 4605 4606 Examples: 4607 >>> import mindspore 4608 >>> import numpy as np 4609 >>> from mindspore import Tensor, nn, ops, Parameter 4610 >>> class Net(nn.Cell): 4611 ... def __init__(self): 4612 ... super(Net, self).__init__() 4613 ... self.apply_adagrad = ops.ApplyAdagrad() 4614 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 4615 ... [0.1, 0.5]]).astype(np.float32)), name="var") 4616 ... self.accum = Parameter(Tensor(np.array([[0.6, 0.5], 4617 ... [0.2, 0.6]]).astype(np.float32)), name="accum") 4618 ... def construct(self, lr, grad): 4619 ... out = self.apply_adagrad(self.var, self.accum, lr, grad) 4620 ... return out 4621 ... 4622 >>> net = Net() 4623 >>> lr = Tensor(0.001, mindspore.float32) 4624 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 4625 >>> output = net(lr, grad) 4626 >>> print(output) 4627 (Tensor(shape=[2, 2], dtype=Float32, value= 4628 [[ 5.99638879e-01, 3.99296492e-01], 4629 [ 9.97817814e-02, 4.99281585e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4630 [[ 6.90000057e-01, 9.90000010e-01], 4631 [ 2.10000008e-01, 1.24000001e+00]])) 4632 """ 4633 4634 __mindspore_signature__ = ( 4635 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4636 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4637 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 4638 sig.make_sig('grad', dtype=sig.sig_dtype.T) 4639 ) 4640 4641 @prim_attr_register 4642 def __init__(self, update_slots=True): 4643 """Initialize ApplyAdagrad.""" 4644 validator.check_value_type("update_slots", update_slots, [bool], self.name) 4645 self.add_prim_attr('side_effect_mem', True) 4646 4647 4648class ApplyAdagradV2(Primitive): 4649 r""" 4650 Updates relevant entries according to the adagradv2 scheme. 4651 4652 .. math:: 4653 \begin{array}{ll} \\ 4654 accum += grad * grad \\ 4655 var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon} 4656 \end{array} 4657 4658 where :math:`\epsilon` represents `epsilon`. 4659 4660 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 4661 to make the data types consistent. 4662 If they have different data types, the lower priority data type will be converted to 4663 the relatively highest priority data type. 4664 4665 Note: 4666 The difference is that `ApplyAdagradV2` has one more small constant value :math:`\epsilon` than `ApplyAdagrad`. 4667 4668 Args: 4669 epsilon (float): A small value added for numerical stability. 4670 update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` . 4671 4672 Inputs: 4673 - **var** (Parameter) - Variable to be updated. With float16 or float32 data type. 4674 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4675 - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`. 4676 - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or 4677 a scalar tensor with float16 or float32 data type. 4678 - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`. 4679 4680 Outputs: 4681 Tuple of 2 Tensors, the updated parameters. 4682 4683 - **var** (Tensor) - The same shape and data type as `var`. 4684 - **accum** (Tensor) - The same shape and data type as `accum`. 4685 4686 Raises: 4687 TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float16 nor float32. 4688 TypeError: If `lr` is neither a Number nor a Tensor. 4689 TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 4690 4691 Supported Platforms: 4692 ``Ascend`` ``GPU`` ``CPU`` 4693 4694 Examples: 4695 >>> import mindspore 4696 >>> import numpy as np 4697 >>> from mindspore import Tensor, nn, ops, Parameter 4698 >>> class Net(nn.Cell): 4699 ... def __init__(self): 4700 ... super(Net, self).__init__() 4701 ... self.apply_adagrad_v2 = ops.ApplyAdagradV2(epsilon=1e-6) 4702 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 4703 ... [0.1, 0.5]]).astype(np.float32)), name="var") 4704 ... self.accum = Parameter(Tensor(np.array([[0.6, 0.5], 4705 ... [0.2, 0.6]]).astype(np.float32)), name="accum") 4706 ... def construct(self, lr, grad): 4707 ... out = self.apply_adagrad_v2(self.var, self.accum, lr, grad) 4708 ... return out 4709 ... 4710 >>> net = Net() 4711 >>> lr = Tensor(0.001, mindspore.float32) 4712 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 4713 >>> output = net(lr, grad) 4714 >>> print(output) 4715 (Tensor(shape=[2, 2], dtype=Float32, value= 4716 [[ 5.99638879e-01, 3.99296492e-01], 4717 [ 9.97817814e-02, 4.99281585e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4718 [[ 6.90000057e-01, 9.90000010e-01], 4719 [ 2.10000008e-01, 1.24000001e+00]])) 4720 """ 4721 4722 __mindspore_signature__ = ( 4723 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4724 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4725 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 4726 sig.make_sig('grad', dtype=sig.sig_dtype.T) 4727 ) 4728 4729 @prim_attr_register 4730 def __init__(self, epsilon, update_slots=True): 4731 """Initialize ApplyAdagradV2.""" 4732 validator.check_value_type("epsilon", epsilon, [float], self.name) 4733 validator.check_value_type("update_slots", update_slots, [bool], self.name) 4734 self.add_prim_attr('side_effect_mem', True) 4735 4736 4737class SparseApplyAdagrad(Primitive): 4738 """ 4739 Deprecated 4740 """ 4741 4742 __mindspore_signature__ = ( 4743 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4744 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4745 sig.make_sig('grad', dtype=sig.sig_dtype.T), 4746 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 4747 ) 4748 4749 @deprecated("1.9", "SparseApplyAdagrad", False) 4750 @prim_attr_register 4751 def __init__(self, lr, update_slots=True, use_locking=False): 4752 """Initialize SparseApplyAdagrad.""" 4753 validator.check_is_float(lr, "lr", self.name) 4754 validator.check_value_type("update_slots", update_slots, [bool], self.name) 4755 validator.check_value_type("use_locking", use_locking, [bool], self.name) 4756 self.add_prim_attr('side_effect_mem', True) 4757 4758 4759class SparseApplyAdagradV2(Primitive): 4760 r""" 4761 Updates relevant entries according to the adagrad scheme, one more epsilon attribute than SparseApplyAdagrad. 4762 4763 .. math:: 4764 \begin{array}{ll} \\ 4765 accum += grad * grad \\ 4766 var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon} 4767 \end{array} 4768 4769 where :math:`\epsilon` represents `epsilon`. 4770 4771 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 4772 to make the data types consistent. 4773 If they have different data types, the lower priority data type will be converted to 4774 the relatively highest priority data type. 4775 4776 Args: 4777 lr (float): Learning rate. 4778 epsilon (float): A small value added for numerical stability. 4779 use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated. 4780 Default: ``False`` . 4781 update_slots (bool): If ``True`` , the computation logic will be different to `False`. Default: ``True`` . 4782 4783 Inputs: 4784 - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32. 4785 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4786 - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`. 4787 - **grad** (Tensor) - Gradients has the same shape as `var` and 4788 :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1. 4789 - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. 4790 The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`. 4791 4792 Outputs: 4793 Tuple of 2 tensors, the updated parameters. 4794 4795 - **var** (Tensor) - The same shape and data type as `var`. 4796 - **accum** (Tensor) - The same shape and data type as `accum`. 4797 4798 Raises: 4799 TypeError: If neither `lr` nor `epsilon` is a float. 4800 TypeError: If neither `update_slots` nor `use_locking` is a bool. 4801 TypeError: If dtype of `var`, `accum` or `grad` is neither float16 nor float32. 4802 TypeError: If dtype of `indices` is not int32. 4803 RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 4804 4805 Supported Platforms: 4806 ``Ascend`` ``GPU`` ``CPU`` 4807 4808 Examples: 4809 >>> import mindspore 4810 >>> import numpy as np 4811 >>> from mindspore import Tensor, nn, ops, Parameter 4812 >>> class Net(nn.Cell): 4813 ... def __init__(self): 4814 ... super(Net, self).__init__() 4815 ... self.sparse_apply_adagrad_v2 = ops.SparseApplyAdagradV2(lr=1e-8, epsilon=1e-6) 4816 ... self.var = Parameter(Tensor(np.array([[0.2]]).astype(np.float32)), name="var") 4817 ... self.accum = Parameter(Tensor(np.array([[0.1]]).astype(np.float32)), name="accum") 4818 ... 4819 ... def construct(self, grad, indices): 4820 ... out = self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices) 4821 ... return out 4822 ... 4823 >>> net = Net() 4824 >>> grad = Tensor(np.array([[0.7]]).astype(np.float32)) 4825 >>> indices = Tensor(np.array([0]), mindspore.int32) 4826 >>> output = net(grad, indices) 4827 >>> print(output) 4828 (Tensor(shape=[1, 1], dtype=Float32, value= 4829 [[ 1.99999988e-01]]), Tensor(shape=[1, 1], dtype=Float32, value= 4830 [[ 5.89999974e-01]])) 4831 """ 4832 4833 __mindspore_signature__ = ( 4834 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4835 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4836 sig.make_sig('grad', dtype=sig.sig_dtype.T), 4837 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 4838 ) 4839 4840 @prim_attr_register 4841 def __init__(self, lr, epsilon, use_locking=False, update_slots=True): 4842 """Initialize SparseApplyAdagradV2.""" 4843 self.lr = validator.check_value_type("lr", lr, [float], self.name) 4844 self.epsilon = validator.check_value_type("epsilon", epsilon, [float], self.name) 4845 self.use_locking = validator.check_value_type("update_slots", update_slots, [bool], self.name) 4846 self.update_slots = validator.check_value_type("use_locking", use_locking, [bool], self.name) 4847 self.add_prim_attr('side_effect_mem', True) 4848 4849 4850class ApplyProximalAdagrad(Primitive): 4851 r""" 4852 Updates relevant entries according to the proximal adagrad algorithm. 4853 The proximal adagrad algorithm was proposed in `Efficient Learning using Forward-Backward Splitting 4854 <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_. 4855 4856 .. math:: 4857 \begin{array}{ll} \\ 4858 accum += grad * grad \\ 4859 \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\ 4860 var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0) 4861 \end{array} 4862 4863 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 4864 to make the data types consistent. 4865 If they have different data types, the lower priority data type will be converted to 4866 the relatively highest priority data type. 4867 4868 Args: 4869 use_locking (bool): If ``True`` , the var and accumulation tensors will be protected from being updated. 4870 Default: ``False`` . 4871 4872 Inputs: 4873 - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32. 4874 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4875 - **accum** (Parameter) - Accumulation to be updated, must have the same shape and dtype as `var`. 4876 - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. The data type must be 4877 float16 or float32. 4878 - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. The data type must be 4879 float16 or float32. 4880 - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a scalar. The data type must be 4881 float16 or float32. 4882 - **grad** (Tensor) - Gradient with the same shape and dtype as `var`. 4883 4884 Outputs: 4885 Tuple of 2 Tensors, the updated parameters. 4886 4887 - **var** (Tensor) - The same shape and data type as `var`. 4888 - **accum** (Tensor) - The same shape and data type as `accum`. 4889 4890 Raises: 4891 TypeError: If `use_blocking` is not a bool. 4892 TypeError: If dtype of `var`, `lr`, `l1` or `l2` is neither float16 nor float32. 4893 TypeError: If `lr`, `l1` or `l2` is neither a Number nor a Tensor. 4894 TypeError: If `grad` is not a Tensor. 4895 TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 4896 4897 Supported Platforms: 4898 ``Ascend`` ``GPU`` ``CPU`` 4899 4900 Examples: 4901 >>> import numpy as np 4902 >>> from mindspore import Tensor, nn, ops, Parameter 4903 >>> class Net(nn.Cell): 4904 ... def __init__(self): 4905 ... super(Net, self).__init__() 4906 ... self.apply_proximal_adagrad = ops.ApplyProximalAdagrad() 4907 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 4908 ... [0.1, 0.5]]).astype(np.float32)), name="var") 4909 ... self.accum = Parameter(Tensor(np.array([[0.6, 0.5], 4910 ... [0.2, 0.6]]).astype(np.float32)), name="accum") 4911 ... self.lr = 0.01 4912 ... self.l1 = 0.0 4913 ... self.l2 = 0.0 4914 ... def construct(self, grad): 4915 ... out = self.apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad) 4916 ... return out 4917 ... 4918 >>> net = Net() 4919 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 4920 >>> output = net(grad) 4921 >>> print(output) 4922 (Tensor(shape=[2, 2], dtype=Float32, value= 4923 [[ 5.96388459e-01, 3.92964751e-01], 4924 [ 9.78178233e-02, 4.92815793e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 4925 [[ 6.90000057e-01, 9.90000010e-01], 4926 [ 2.10000008e-01, 1.24000001e+00]])) 4927 """ 4928 4929 __mindspore_signature__ = ( 4930 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4931 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 4932 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 4933 sig.make_sig('l1', dtype=sig.sig_dtype.T2), 4934 sig.make_sig('l2', dtype=sig.sig_dtype.T3), 4935 sig.make_sig('grad', dtype=sig.sig_dtype.T) 4936 ) 4937 4938 @prim_attr_register 4939 def __init__(self, use_locking=False): 4940 """Initialize ApplyProximalAdagrad.""" 4941 self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad'], 4942 outputs=['var', 'accum']) 4943 self.add_prim_attr('side_effect_mem', True) 4944 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 4945 4946 4947class SparseApplyProximalAdagrad(Primitive): 4948 r""" 4949 Updates relevant entries according to the proximal adagrad algorithm. 4950 Compared with :class:`mindspore.ops.ApplyProximalAdagrad`, 4951 an additional index tensor is input. 4952 4953 .. math:: 4954 \begin{array}{ll} \\ 4955 accum += grad * grad \\ 4956 \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\ 4957 var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0) 4958 \end{array} 4959 4960 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 4961 to make the data types consistent. 4962 If they have different data types, the lower priority data type will be converted to 4963 the relatively highest priority data type. 4964 4965 Args: 4966 use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated. 4967 Default: ``False`` . 4968 4969 Inputs: 4970 - **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32. 4971 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 4972 - **accum** (Parameter) - Variable tensor to be updated, has the same shape as `var`. 4973 - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or 4974 a scalar tensor with float16 or float32 data type. It must be positive. 4975 - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or 4976 a scalar tensor with float16 or float32 data type. It must be non-negative. 4977 - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or 4978 a scalar tensor with float16 or float32 data type. It must be non-negative. 4979 - **grad** (Tensor) - A tensor must meet with 4980 :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1. 4981 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 4982 If there are duplicates in `indices`, the behavior is undefined. Must be one of the 4983 following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`. 4984 4985 Outputs: 4986 Tuple of 2 tensors, the updated parameters. 4987 4988 - **var** (Tensor) - The same shape and data type as `var`. 4989 - **accum** (Tensor) - The same shape and data type as `accum`. 4990 4991 Raises: 4992 TypeError: If `use_locking` is not a bool. 4993 TypeError: If dtype of `var`, `accum`, `lr`, `l1`, `l2` or `grad` is neither float16 nor float32. 4994 TypeError: If dtype of `indices` is neither int32 nor int64. 4995 ValueError: If `lr` <= 0 or `l1` < 0 or `l2` < 0. 4996 RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 4997 4998 Supported Platforms: 4999 ``Ascend`` ``GPU`` 5000 5001 Examples: 5002 >>> import numpy as np 5003 >>> from mindspore import Tensor, nn, ops, Parameter 5004 >>> class Net(nn.Cell): 5005 ... def __init__(self): 5006 ... super(Net, self).__init__() 5007 ... self.sparse_apply_proximal_adagrad = ops.SparseApplyProximalAdagrad() 5008 ... self.var = Parameter(Tensor(np.array([[4.1, 7.2], [1.1, 3.0]], np.float32)), name="var") 5009 ... self.accum = Parameter(Tensor(np.array([[0, 0], [0, 0]], np.float32)), name="accum") 5010 ... self.lr = 1.0 5011 ... self.l1 = 1.0 5012 ... self.l2 = 0.0 5013 ... def construct(self, grad, indices): 5014 ... out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, 5015 ... self.l2, grad, indices) 5016 ... return out 5017 ... 5018 >>> net = Net() 5019 >>> grad = Tensor(np.array([[1, 1], [1, 1]], np.float32)) 5020 >>> indices = Tensor(np.array([0, 1], np.int32)) 5021 >>> output = net(grad, indices) 5022 >>> print(output) 5023 (Tensor(shape=[2, 2], dtype=Float32, value= 5024 [[ 2.09999990e+00, 5.19999981e+00], 5025 [ 0.00000000e+00, 1.00000000e+00]]), Tensor(shape=[2, 2], dtype=Float32, value= 5026 [[ 1.00000000e+00, 1.00000000e+00], 5027 [ 1.00000000e+00, 1.00000000e+00]])) 5028 """ 5029 5030 __mindspore_signature__ = ( 5031 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5032 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5033 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 5034 sig.make_sig('l1', dtype=sig.sig_dtype.T2), 5035 sig.make_sig('l2', dtype=sig.sig_dtype.T3), 5036 sig.make_sig('grad', dtype=sig.sig_dtype.T), 5037 sig.make_sig('indices', dtype=sig.sig_dtype.T4) 5038 ) 5039 5040 @prim_attr_register 5041 def __init__(self, use_locking=False): 5042 """Initialize SparseApplyProximalAdagrad.""" 5043 self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'], 5044 outputs=['var', 'accum']) 5045 self.add_prim_attr('side_effect_mem', True) 5046 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 5047 5048 5049class ApplyAddSign(Primitive): 5050 r""" 5051 Updates relevant entries according to the AddSign algorithm. 5052 5053 .. math:: 5054 \begin{array}{ll} \\ 5055 m_{t+1} = \beta * m_{t} + (1 - \beta) * g \\ 5056 \text{update} = (\alpha + \text{sign_decay} * sign(g) * sign(m)) * g \\ 5057 var = var - lr_{t+1} * \text{update} 5058 \end{array} 5059 5060 :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}` 5061 is the last moment of :math:`m_{t+1}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`, 5062 :math:`\alpha` represents `alpha`, :math:`\beta` represents `beta`. 5063 5064 The data type of all inputs must be float16 or float32 on Ascend and float16, float32 or float64 on CPU and GPU. 5065 5066 Inputs of `var`, `accum` and `grad` , `sign_decay` and `beta` comply with the implicit type conversion rules 5067 to make the data types consistent. 5068 If they have different data types, the lower priority data type will be converted to 5069 the relatively highest priority data type. 5070 5071 Inputs: 5072 - **var** (Parameter) - Variable tensor to be updated. 5073 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5074 - **m** (Parameter) - Variable tensor to be updated, has the same data type as `var`. 5075 - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. 5076 - **alpha** (Union[Number, Tensor]) - Must be a scalar. 5077 - **sign_decay** (Union[Number, Tensor]) - Must be a scalar. 5078 - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar. 5079 - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient. 5080 5081 Outputs: 5082 Tuple of 2 Tensors, the updated parameters. 5083 5084 - **var** (Tensor) - The same shape and data type as `var`. 5085 - **m** (Tensor) - The same shape and data type as `m`. 5086 5087 Raises: 5088 TypeError: If dtype of `var`, `lr` and `alpha` is not float16, float32 or float64. 5089 TypeError: If dtype of `sign_decay` and `beta` are both not float16, float32 or float64. 5090 TypeError: If `lr`, `alpha` or `sign_decay` is neither a Number nor a Tensor. 5091 TypeError: If `grad` is not a Tensor. 5092 TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported. 5093 5094 Supported Platforms: 5095 ``Ascend`` ``GPU`` ``CPU`` 5096 5097 Examples: 5098 >>> import numpy as np 5099 >>> from mindspore import Tensor, nn, ops, Parameter 5100 >>> class Net(nn.Cell): 5101 ... def __init__(self): 5102 ... super(Net, self).__init__() 5103 ... self.apply_add_sign = ops.ApplyAddSign() 5104 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 5105 ... [0.1, 0.5]]).astype(np.float32)), name="var") 5106 ... self.m = Parameter(Tensor(np.array([[0.6, 0.5], 5107 ... [0.2, 0.6]]).astype(np.float32)), name="m") 5108 ... self.lr = 0.001 5109 ... self.alpha = 1.0 5110 ... self.sign_decay = 0.99 5111 ... self.beta = 0.9 5112 ... def construct(self, grad): 5113 ... out = self.apply_add_sign(self.var, self.m, self.lr, self.alpha, self.sign_decay, self.beta, grad) 5114 ... return out 5115 ... 5116 >>> net = Net() 5117 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 5118 >>> output = net(grad) 5119 >>> print(output) 5120 (Tensor(shape=[2, 2], dtype=Float32, value= 5121 [[ 5.99403024e-01, 3.98607016e-01], 5122 [ 9.98010039e-02, 4.98407990e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 5123 [[ 5.70000052e-01, 5.19999981e-01], 5124 [ 1.89999998e-01, 6.20000064e-01]])) 5125 """ 5126 5127 __mindspore_signature__ = ( 5128 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5129 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5130 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 5131 sig.make_sig('alpha', dtype=sig.sig_dtype.T2), 5132 sig.make_sig('sign_decay', dtype=sig.sig_dtype.T3), 5133 sig.make_sig('beta', dtype=sig.sig_dtype.T3), 5134 sig.make_sig('grad', dtype=sig.sig_dtype.T) 5135 ) 5136 5137 @prim_attr_register 5138 def __init__(self): 5139 """Initialize ApplyAddSign.""" 5140 self.add_prim_attr('side_effect_mem', True) 5141 5142 5143class ApplyPowerSign(Primitive): 5144 r""" 5145 Updates relevant entries according to the AddSign algorithm. 5146 5147 The AddSign algorithm was proposed in `Neural Optimizer Search with Reinforcement Learning 5148 <https://arxiv.org/abs/1709.07417>`_. 5149 5150 .. math:: 5151 \begin{array}{ll} \\ 5152 m_{t+1} = \beta * m_{t} + (1 - \beta) * g \\ 5153 \text{update} = \exp(\text{logbase} * \text{sign_decay} * sign(g) * sign(m)) * g \\ 5154 var = var - lr_{t+1} * \text{update} 5155 \end{array} 5156 5157 :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}` 5158 is the last moment of :math:`m_{t+1}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`, 5159 :math:`\beta` represents `beta`. 5160 5161 All of inputs comply with the implicit type conversion rules to make the data types consistent. 5162 If `lr`, `logbase`, `sign_decay` or `beta` is a number, the number is automatically converted to Tensor, 5163 and the data type is consistent with the Tensor data type involved in the operation. 5164 If inputs are tensors and have different data types, the lower priority data type will be converted to 5165 the relatively highest priority data type. 5166 5167 Note: 5168 On Ascend, input data type of float64 is currently not supported. 5169 5170 Inputs: 5171 - **var** (Parameter) - Variable tensor to be updated. With float64, float32 or float16 data type. 5172 If data type of `var` is float16, all inputs must have the same data type as `var`. 5173 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5174 - **m** (Parameter) - Variable tensor to be updated, has the same shape as `var`. 5175 - **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar or Tensor 5176 with float64, float32 or float16 data type. 5177 - **logbase** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or float16 data type. 5178 - **sign_decay** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or 5179 float16 data type. 5180 - **beta** (Union[Number, Tensor]) - The exponential decay rate, should be a scalar or Tensor 5181 with float64, float32 or float16 data type. 5182 - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient. 5183 5184 Outputs: 5185 Tuple of 2 Tensors, the updated parameters. 5186 5187 - **var** (Tensor) - The same shape and data type as `var`. 5188 - **m** (Tensor) - The same shape and data type as `m`. 5189 5190 Raises: 5191 TypeError: If dtype of `var`, `lr`, `logbase`, `sign_decay`, `beta` or `grad` is not one of float16, 5192 float32 or float64. 5193 TypeError: If `lr`, `logbase`, `sign_decay` or `beta` is neither a Number nor a Tensor. 5194 TypeError: If `grad` is not a Tensor. 5195 TypeError: If the data type of `lr`, `logbase`, `sign_decay` and `grad` conversion of Parameter 5196 is not supported. 5197 5198 Supported Platforms: 5199 ``Ascend`` ``GPU`` ``CPU`` 5200 5201 Examples: 5202 >>> import numpy as np 5203 >>> from mindspore import Tensor, nn, ops, Parameter 5204 >>> class Net(nn.Cell): 5205 ... def __init__(self): 5206 ... super(Net, self).__init__() 5207 ... self.apply_power_sign = ops.ApplyPowerSign() 5208 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 5209 ... [0.1, 0.5]]).astype(np.float32)), name="var") 5210 ... self.m = Parameter(Tensor(np.array([[0.6, 0.5], 5211 ... [0.2, 0.6]]).astype(np.float32)), name="m") 5212 ... self.lr = 0.001 5213 ... self.logbase = np.e 5214 ... self.sign_decay = 0.99 5215 ... self.beta = 0.9 5216 ... def construct(self, grad): 5217 ... out = self.apply_power_sign(self.var, self.m, self.lr, self.logbase, 5218 ... self.sign_decay, self.beta, grad) 5219 ... return out 5220 ... 5221 >>> net = Net() 5222 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 5223 >>> output = net(grad) 5224 >>> print(output) 5225 (Tensor(shape=[2, 2], dtype=Float32, value= 5226 [[ 5.95575690e-01, 3.89676481e-01], 5227 [ 9.85252112e-02, 4.88201708e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 5228 [[ 5.70000052e-01, 5.19999981e-01], 5229 [ 1.89999998e-01, 6.20000064e-01]])) 5230 """ 5231 5232 __mindspore_signature__ = ( 5233 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5234 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5235 sig.make_sig('lr', dtype=sig.sig_dtype.T), 5236 sig.make_sig('logbase', dtype=sig.sig_dtype.T), 5237 sig.make_sig('sign_decay', dtype=sig.sig_dtype.T), 5238 sig.make_sig('beta', dtype=sig.sig_dtype.T), 5239 sig.make_sig('grad', dtype=sig.sig_dtype.T) 5240 ) 5241 5242 @prim_attr_register 5243 def __init__(self): 5244 """Initialize ApplyPowerSign.""" 5245 self.add_prim_attr('side_effect_mem', True) 5246 5247 5248class ApplyGradientDescent(Primitive): 5249 r""" 5250 Updates `var` by subtracting `alpha` * `delta` from it. 5251 5252 .. math:: 5253 var = var - \alpha * \delta 5254 5255 where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`. 5256 5257 Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent. 5258 If they have different data types, the lower priority data type will be converted to 5259 the relatively highest priority data type. 5260 5261 Inputs: 5262 - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type. 5263 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5264 - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type. 5265 - **delta** (Tensor) - A tensor for the change, has the same shape as `var`. 5266 5267 Outputs: 5268 Tensor, represents the updated `var`. 5269 5270 Raises: 5271 TypeError: If dtype of `var` or `alpha` is neither float16 nor float32. 5272 TypeError: If `delta` is not a Tensor. 5273 TypeError: If `alpha` is neither a Number nor a Tensor. 5274 TypeError: If the data type of `var` and `delta` conversion of Parameter is not supported. 5275 5276 Supported Platforms: 5277 ``Ascend`` ``GPU`` ``CPU`` 5278 5279 Examples: 5280 >>> import numpy as np 5281 >>> from mindspore import Tensor, nn, ops, Parameter 5282 >>> class Net(nn.Cell): 5283 ... def __init__(self): 5284 ... super(Net, self).__init__() 5285 ... self.apply_gradient_descent = ops.ApplyGradientDescent() 5286 ... self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") 5287 ... self.alpha = 0.001 5288 ... def construct(self, delta): 5289 ... out = self.apply_gradient_descent(self.var, self.alpha, delta) 5290 ... return out 5291 ... 5292 >>> net = Net() 5293 >>> delta = Tensor(np.array([[0.1, 0.1], [0.1, 0.1]]).astype(np.float32)) 5294 >>> output = net(delta) 5295 >>> print(output) 5296 [[0.9999 0.9999] 5297 [0.9999 0.9999]] 5298 """ 5299 5300 __mindspore_signature__ = ( 5301 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5302 sig.make_sig('alpha', dtype=sig.sig_dtype.T1), 5303 sig.make_sig('delta', dtype=sig.sig_dtype.T) 5304 ) 5305 5306 @prim_attr_register 5307 def __init__(self): 5308 """Initialize ApplyGradientDescent.""" 5309 self.add_prim_attr('side_effect_mem', True) 5310 5311 5312class ApplyProximalGradientDescent(Primitive): 5313 r""" 5314 Updates relevant entries according to the FOBOS(Forward Backward Splitting) algorithm. 5315 Refer to the paper `Efficient Learning using Forward-Backward Splitting 5316 <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_ for more details. 5317 5318 .. math:: 5319 \begin{array}{ll} \\ 5320 \text{prox_v} = var - \alpha * \delta \\ 5321 var = \frac{sign(\text{prox_v})}{1 + \alpha * l2} * \max(\left| \text{prox_v} \right| - \alpha * l1, 0) 5322 \end{array} 5323 5324 where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`. 5325 5326 Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent. 5327 If they have different data types, the lower priority data type will be converted to 5328 the relatively highest priority data type. 5329 5330 Inputs: 5331 - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type. 5332 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5333 - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type. 5334 - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. 5335 With float32 or float16 data type. 5336 - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a scalar. 5337 With float32 or float16 data type. 5338 - **delta** (Tensor) - A tensor for the change. 5339 5340 Outputs: 5341 Tensor, represents the updated `var`. 5342 5343 Raises: 5344 TypeError: If dtype of `var`, `alpha`, `l1` or `l2` is neither float16 nor float32. 5345 TypeError: If `alpha`, `l1` or `l2` is neither a Number nor a Tensor. 5346 TypeError: If `delta` is not a Tensor. 5347 TypeError: If the data type of `var`, and `delta` conversion of Parameter is not supported. 5348 5349 Supported Platforms: 5350 ``Ascend`` ``GPU`` ``CPU`` 5351 5352 Examples: 5353 >>> import numpy as np 5354 >>> from mindspore import Tensor, nn, ops, Parameter 5355 >>> class Net(nn.Cell): 5356 ... def __init__(self): 5357 ... super(Net, self).__init__() 5358 ... self.apply_proximal_gradient_descent = ops.ApplyProximalGradientDescent() 5359 ... self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") 5360 ... self.alpha = 0.001 5361 ... self.l1 = 0.1 5362 ... self.l2 = 0.1 5363 ... def construct(self, delta): 5364 ... out = self.apply_proximal_gradient_descent(self.var, self.alpha, self.l1, self.l2, delta) 5365 ... return out 5366 ... 5367 >>> net = Net() 5368 >>> delta = Tensor(np.array([[0.1, 0.1], [0.1, 0.1]]).astype(np.float32)) 5369 >>> output = net(delta) 5370 >>> print(output) 5371 [[0.99969995 0.99969995] 5372 [0.99969995 0.99969995]] 5373 """ 5374 5375 __mindspore_signature__ = ( 5376 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5377 sig.make_sig('alpha', dtype=sig.sig_dtype.T1), 5378 sig.make_sig('l1', dtype=sig.sig_dtype.T2), 5379 sig.make_sig('l2', dtype=sig.sig_dtype.T3), 5380 sig.make_sig('delta', dtype=sig.sig_dtype.T) 5381 ) 5382 5383 @prim_attr_register 5384 def __init__(self): 5385 """Initialize ApplyGradientDescent.""" 5386 self.add_prim_attr('side_effect_mem', True) 5387 5388 5389class LARSUpdate(PrimitiveWithInfer): 5390 """ 5391 Conducts LARS (layer-wise adaptive rate scaling) update on the sum of squares of gradient. 5392 5393 For more details, please refer to :class:`mindspore.nn.LARS`. 5394 5395 Args: 5396 epsilon (float, optional): Term added to the denominator to improve numerical stability. 5397 Default: ``1e-05`` . 5398 hyperpara (float, optional): Trust coefficient for calculating the local learning rate. 5399 Default: ``0.001`` . 5400 use_clip (bool, optional): Whether to use clip operation for calculating the local learning rate. 5401 Default: ``False`` . 5402 5403 Inputs: 5404 - **weight** (Tensor) - A tensor, representing the weight. 5405 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5406 - **gradient** (Tensor) - The gradient of weight, which has the same shape and dtype with weight. 5407 - **norm_weight** (Tensor) - A scalar tensor, representing the sum of squares of weight. 5408 - **norm_gradient** (Tensor) - A scalar tensor, representing the sum of squares of gradient. 5409 - **weight_decay** (Union[Number, Tensor]) - Weight decay. It must be a scalar tensor or number. 5410 - **learning_rate** (Union[Number, Tensor]) - Learning rate. It must be a scalar tensor or number. 5411 5412 Outputs: 5413 Tensor, represents the new gradient. 5414 5415 Raises: 5416 TypeError: If neither `epsilon` nor `hyperpara` is a float. 5417 TypeError: If `use_clip` is not a bool. 5418 TypeError: If `weight`, `gradient`, `norm_weight` or `norm_gradient` is not a Tensor. 5419 TypeError: If `weight_decay` or `learning_rate` is neither a Number nor a Tensor. 5420 TypeError: If shape of `gradient` is not the same as `weight`. 5421 5422 Supported Platforms: 5423 ``Ascend`` 5424 5425 Examples: 5426 >>> import numpy as np 5427 >>> from mindspore import Tensor, nn, ops 5428 >>> class Net(nn.Cell): 5429 ... def __init__(self): 5430 ... super(Net, self).__init__() 5431 ... self.lars = ops.LARSUpdate() 5432 ... self.reduce = ops.ReduceSum() 5433 ... self.square = ops.Square() 5434 ... def construct(self, weight, gradient): 5435 ... w_square_sum = self.reduce(self.square(weight)) 5436 ... grad_square_sum = self.reduce(self.square(gradient)) 5437 ... grad_t = self.lars(weight, gradient, w_square_sum, grad_square_sum, 0.0, 1.0) 5438 ... return grad_t 5439 ... 5440 >>> weight = Tensor(np.array([[0.5, 0.8, 0.2], [0.6, 0.4, 0.2]]).astype(np.float32)) 5441 >>> gradient = Tensor(np.array([[0.4, 0.4, 0.5], [0.2, 0.4, 0.3]]).astype(np.float32)) 5442 >>> net = Net() 5443 >>> output = net(Tensor(weight), Tensor(gradient)) 5444 >>> print(output) 5445 [[0.0005265 0.0005265 0.00065813] 5446 [0.00026325 0.0005265 0.00039488]] 5447 """ 5448 5449 @prim_attr_register 5450 def __init__(self, epsilon=1e-05, hyperpara=0.001, use_clip=False): 5451 """Initialize LARSUpdate.""" 5452 validator.check_value_type("epsilon", epsilon, [float], self.name) 5453 validator.check_value_type("hyperpara", hyperpara, [float], self.name) 5454 validator.check_value_type("use_clip", use_clip, [bool], self.name) 5455 5456 5457class ApplyFtrl(Primitive): 5458 """ 5459 Updates relevant entries according to the FTRL scheme. 5460 5461 For more details, please refer to :class:`mindspore.nn.FTRL`. 5462 5463 Note: 5464 - Currently, only positive numbers are supported on the Ascend platform, 5465 and the calculation results for other scenarios are not defined. 5466 - Inputs of `var`, `accum`, `linear` and `grad` comply with the implicit type conversion rules 5467 to make the data types consistent. 5468 If they have different data types, the lower priority data type will be converted to 5469 the relatively highest priority data type. 5470 5471 Args: 5472 use_locking (bool): Use locks for updating operation if ``True`` . Default: ``False`` . 5473 5474 Inputs: 5475 - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32. 5476 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5477 - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`. 5478 - **linear** (Parameter) - The linear coefficient to be updated, must be same shape as `var`. 5479 - **grad** (Tensor) - Gradient. The data type must be float16 or float32. 5480 - **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: ``0.001`` . 5481 It must be a float number or a scalar tensor with float16 or float32 data type. 5482 - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be greater than or equal to zero. 5483 Default: ``0.0`` . It must be a float number or a scalar tensor with float16 or float32 data type. 5484 - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be greater than or equal to zero. 5485 Default: ``0.0`` . It must be a float number or a scalar tensor with float16 or float32 data type. 5486 - **lr_power** (Union[Number, Tensor]) - Learning rate power controls how the learning rate decreases 5487 during training, must be less than or equal to zero. Use fixed learning rate if lr_power is zero. 5488 Default: ``-0.5`` . It must be a float number or a scalar tensor with float16 or float32 data type. 5489 5490 Outputs: 5491 - **var** (Tensor) - Represents the updated `var`. As the input parameters has been updated in-place, this 5492 value is always zero when the platform is GPU. 5493 5494 Raises: 5495 TypeError: If `use_locking` is not a bool. 5496 TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32. 5497 TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor. 5498 TypeError: If `grad` is not a Tensor. 5499 TypeError: If the parameter types of `var`, `accum` and `linear` are inconsistent. 5500 TypeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var` 5501 and the precision is greater than `var`. 5502 5503 Supported Platforms: 5504 ``Ascend`` ``GPU`` ``CPU`` 5505 5506 Examples: 5507 >>> import numpy as np 5508 >>> from mindspore import Tensor, nn, ops, Parameter 5509 >>> class ApplyFtrlNet(nn.Cell): 5510 ... def __init__(self): 5511 ... super(ApplyFtrlNet, self).__init__() 5512 ... self.apply_ftrl = ops.ApplyFtrl() 5513 ... self.lr = 0.001 5514 ... self.l1 = 0.0 5515 ... self.l2 = 0.0 5516 ... self.lr_power = -0.5 5517 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], 5518 ... [0.1, 0.5]]).astype(np.float32)), name="var") 5519 ... self.accum = Parameter(Tensor(np.array([[0.6, 0.5], 5520 ... [0.2, 0.6]]).astype(np.float32)), name="accum") 5521 ... self.linear = Parameter(Tensor(np.array([[0.9, 0.1], 5522 ... [0.7, 0.8]]).astype(np.float32)), name="linear") 5523 ... 5524 ... def construct(self, grad): 5525 ... out = self.apply_ftrl(self.var, self.accum, self.linear, grad, self.lr, self.l1, self.l2, 5526 ... self.lr_power) 5527 ... return out 5528 ... 5529 >>> net = ApplyFtrlNet() 5530 >>> input_x = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 5531 >>> output = net(input_x) 5532 >>> print(net.var.asnumpy()) 5533 [[ 0.0390525 0.11492836] 5534 [ 0.00066425 0.15075898]] 5535 """ 5536 5537 __mindspore_signature__ = ( 5538 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5539 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5540 sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5541 sig.make_sig('grad', dtype=sig.sig_dtype.T), 5542 sig.make_sig('lr', dtype=sig.sig_dtype.T), 5543 sig.make_sig('l1', dtype=sig.sig_dtype.T), 5544 sig.make_sig('l2', dtype=sig.sig_dtype.T), 5545 sig.make_sig('lr_power', dtype=sig.sig_dtype.T) 5546 ) 5547 5548 @prim_attr_register 5549 def __init__(self, use_locking=False): 5550 """Initialize ApplyFtrl.""" 5551 self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'lr', 'l1', 'l2', 'lr_power'], 5552 outputs=['output']) 5553 self.add_prim_attr('side_effect_mem', True) 5554 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 5555 5556 5557class SparseApplyFtrl(Primitive): 5558 """ 5559 Updates relevant entries according to the FTRL-proximal scheme 5560 For more details, please refer to :class:`mindspore.nn.FTRL`. 5561 5562 All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent. 5563 If they have different data types, the lower priority data type will be converted to 5564 the relatively highest priority data type. 5565 5566 Args: 5567 lr (float): The learning rate value, must be positive. 5568 l1 (float): l1 regularization strength, must be greater than or equal to zero. 5569 l2 (float): l2 regularization strength, must be greater than or equal to zero. 5570 lr_power (float): Learning rate power controls how the learning rate decreases during training, 5571 must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero. 5572 use_locking (bool, optional): Use locks for updating operation if ``True`` . Default: ``False`` . 5573 5574 Inputs: 5575 - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32. 5576 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 5577 - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`. 5578 - **linear** (Parameter) - The linear coefficient to be updated, must be the same shape as `var`. 5579 - **grad** (Tensor) - A tensor must meet with :math:`grad.shape[1:] = var.shape[1:]` 5580 if var.shape > 1. 5581 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 5582 If there are duplicates in `indices`, the behavior is undefined. 5583 The type must be int32 or int64 and :math:`indices.shape[0] = grad.shape[0]`. 5584 5585 Outputs: 5586 - **var** (Tensor) - Tensor, has the same shape and data type as `var`. 5587 - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`. 5588 - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`. 5589 5590 Raises: 5591 TypeError: If `lr`, `l1`, `l2` or `lr_power` is not a float. 5592 TypeError: If `use_locking` is not a bool. 5593 TypeError: If dtype of `var`, `accum`, `linear` or `grad` is neither float16 nor float32. 5594 TypeError: If dtype of `indices` is neither int32 nor int64. 5595 RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported. 5596 5597 Supported Platforms: 5598 ``Ascend`` ``GPU`` ``CPU`` 5599 5600 Examples: 5601 >>> import mindspore 5602 >>> import numpy as np 5603 >>> from mindspore import Tensor, nn, Parameter, ops 5604 >>> class SparseApplyFtrlNet(nn.Cell): 5605 ... def __init__(self): 5606 ... super(SparseApplyFtrlNet, self).__init__() 5607 ... self.sparse_apply_ftrl = ops.SparseApplyFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5) 5608 ... self.var = Parameter(Tensor(np.array([[0.2]]).astype(np.float32)), name="var") 5609 ... self.accum = Parameter(Tensor(np.array([[0.1]]).astype(np.float32)), name="accum") 5610 ... self.linear = Parameter(Tensor(np.array([[0.6]]).astype(np.float32)), name="linear") 5611 ... 5612 ... def construct(self, grad, indices): 5613 ... out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices) 5614 ... return out 5615 ... 5616 >>> net = SparseApplyFtrlNet() 5617 >>> grad = Tensor(np.array([[0.7]]).astype(np.float32)) 5618 >>> indices = Tensor(np.ones([1]), mindspore.int32) 5619 >>> output = net(grad, indices) 5620 >>> print(output) 5621 (Tensor(shape=[1, 1], dtype=Float32, value= 5622 [[2.00000003e-01]]), Tensor(shape=[1, 1], dtype=Float32, value= 5623 [[1.00000001e-01]]), Tensor(shape=[1, 1], dtype=Float32, value= 5624 [[6.00000024e-01]])) 5625 """ 5626 5627 __mindspore_signature__ = ( 5628 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5629 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5630 sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5631 sig.make_sig('grad', dtype=sig.sig_dtype.T), 5632 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 5633 ) 5634 5635 @prim_attr_register 5636 def __init__(self, lr, l1, l2, lr_power, use_locking=False): 5637 """Initialize SparseApplyFtrl.""" 5638 validator.check_value_type("lr", lr, [float], self.name) 5639 validator.check_value_type("l1", l1, [float], self.name) 5640 validator.check_value_type("l2", l2, [float], self.name) 5641 validator.check_value_type("lr_power", lr_power, [float], self.name) 5642 self.lr = validator.check_positive_float(lr, "lr", self.name) 5643 self.l1 = validator.check_non_negative_float(l1, "l1", self.name) 5644 self.l2 = validator.check_non_negative_float(l2, "l2", self.name) 5645 self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name) 5646 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 5647 self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'], 5648 outputs=['var', 'accum', 'linear']) 5649 self.add_prim_attr('side_effect_mem', True) 5650 5651 5652class SparseApplyFtrlV2(PrimitiveWithInfer): 5653 """ 5654 The SparseApplyFtrlV2 interface is deprecated, please use the :class:`mindspore.ops.SparseApplyFtrl` instead. 5655 5656 Supported Platforms: 5657 Deprecated 5658 """ 5659 5660 __mindspore_signature__ = ( 5661 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5662 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5663 sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 5664 sig.make_sig('grad', dtype=sig.sig_dtype.T), 5665 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 5666 ) 5667 5668 @deprecated("2.1", "ops.SparseApplyFtrl", False) 5669 @prim_attr_register 5670 def __init__(self, lr, l1, l2, l2_shrinkage, lr_power, use_locking=False): 5671 """Initialize SparseApplyFtrlV2.""" 5672 validator.check_value_type("lr", lr, [float], self.name) 5673 validator.check_value_type("l1", l1, [float], self.name) 5674 validator.check_value_type("l2", l2, [float], self.name) 5675 validator.check_value_type("lr_power", lr_power, [float], self.name) 5676 self.lr = validator.check_positive_float(lr, "lr", self.name) 5677 self.l1 = validator.check_non_negative_float(l1, "l1", self.name) 5678 self.l2 = validator.check_non_negative_float(l2, "l2", self.name) 5679 self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name) 5680 self.l2_shrinkage = validator.check_value_type("l2_shrinkage", l2_shrinkage, [float], self.name) 5681 self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) 5682 self.add_prim_attr('side_effect_mem', True) 5683 5684 def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape): 5685 validator.check('var shape', var_shape, 'accum shape', accum_shape, validator.EQ, self.name) 5686 validator.check('var shape', var_shape, 'linear shape', linear_shape, validator.EQ, self.name) 5687 if len(var_shape) > 1: 5688 validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], validator.EQ, self.name) 5689 validator.check_int(len(indices_shape), 1, validator.EQ, "indices rank", self.name) 5690 validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], validator.EQ, self.name) 5691 return var_shape, accum_shape, linear_shape 5692 5693 def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype): 5694 args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype, 5695 "linear_dtype": linear_dtype, "grad_dtype": grad_dtype} 5696 validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name) 5697 validator.check_tensor_dtype_valid("indicese", indices_dtype, [mstype.int32], self.name) 5698 return var_dtype, accum_dtype, linear_dtype 5699 5700 5701class Dropout2D(PrimitiveWithInfer): 5702 r""" 5703 During training, randomly zeroes some channels of the input tensor with probability :math:`1-keep\_prob` 5704 from a Bernoulli distribution(For a 4-dimensional tensor with a shape of :math:`(N, C, H, W)`, 5705 the channel feature map refers 5706 to a 2-dimensional feature map with the shape of :math:`(H, W)`). 5707 5708 Dropout2D can improve the independence between channel feature maps. 5709 5710 Note: 5711 The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout2d`. 5712 5713 Args: 5714 keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8, 5715 means dropping out 20% of channels. Default: ``0.5`` . 5716 5717 Inputs: 5718 - **x** (Tensor) - A 4-D tensor with shape :math:`(N, C, H, W)`, where N is the batch size, C is the number 5719 of channels, H is the feature height, and W is the feature width. 5720 5721 Outputs: 5722 - **output** (Tensor) - With the same shape and data type as `x`. 5723 - **mask** (Tensor) - With the same shape as `x` and the data type is bool. 5724 5725 Raises: 5726 TypeError: If `x` is not a Tensor. 5727 TypeError: If the data type of `keep_prob` is not float. 5728 ValueError: If `keep_prob` is out of the range `[0.0, 1.0]`. 5729 ValueError: If `x` shape is not `4D`. 5730 5731 Supported Platforms: 5732 ``Ascend`` ``GPU`` ``CPU`` 5733 5734 Examples: 5735 >>> import mindspore 5736 >>> import numpy as np 5737 >>> from mindspore import Tensor, ops 5738 >>> dropout = ops.Dropout2D(keep_prob=0.5) 5739 >>> x = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32) 5740 >>> output, mask = dropout(x) 5741 >>> print(output.shape) 5742 (2, 1, 2, 3) 5743 """ 5744 5745 @prim_attr_register 5746 def __init__(self, keep_prob=0.5): 5747 """Initialize Dropout2D.""" 5748 super().__init__("Dropout2D") 5749 self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) 5750 self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name) 5751 5752 5753class Dropout3D(PrimitiveWithInfer): 5754 r""" 5755 During training, randomly zeroes some channels of the input tensor 5756 with probability :math:`1-keep\_prob` from a Bernoulli distribution(For a 5-dimensional 5757 tensor with a shape of NCDHW, 5758 the channel feature map refers to a 3-dimensional feature map with a shape of DHW). 5759 5760 Note: 5761 The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout3d`. 5762 5763 Dropout3D can improve the independence between channel feature maps. 5764 5765 Args: 5766 keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8, 5767 means dropping out 20% of channels. Default: ``0.5`` . 5768 5769 Inputs: 5770 - **x** (Tensor) - A 5-D tensor with shape :math:`(N, C, D, H, W)`, where N is the batch size, C is the number 5771 of channels, D is the feature depth, H is the feature height, and W is the feature width. 5772 5773 Outputs: 5774 - **output** (Tensor) - With the same shape and data type as `x`. 5775 - **mask** (Tensor) - With the same shape as `x` and the data type is bool. 5776 5777 Raises: 5778 TypeError: If the data type of `keep_prob` is not float. 5779 ValueError: If `keep_prob` is out of the range [0.0, 1.0]; 5780 or if the dim of input is not 5-D. 5781 5782 Supported Platforms: 5783 ``Ascend`` ``GPU`` ``CPU`` 5784 5785 Examples: 5786 >>> import mindspore 5787 >>> import numpy as np 5788 >>> from mindspore import Tensor, ops 5789 >>> dropout = ops.Dropout3D(keep_prob=0.5) 5790 >>> x = Tensor(np.ones([2, 1, 2, 1, 2]), mindspore.float32) 5791 >>> output, mask = dropout(x) 5792 >>> print(output.shape) 5793 (2, 1, 2, 1, 2) 5794 """ 5795 5796 @prim_attr_register 5797 def __init__(self, keep_prob=0.5): 5798 """Initialize Dropout3D.""" 5799 super().__init__("Dropout3D") 5800 self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) 5801 self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name) 5802 5803 5804class CTCLoss(Primitive): 5805 r""" 5806 Calculates the CTC (Connectionist Temporal Classification) loss and the gradient. 5807 5808 The bottom layer of this interface calls the implementation of the third-party baidu-research::warp-ctc. 5809 The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with 5810 Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_. 5811 5812 CTCLoss calculates loss between a continuous time series and a target sequence. 5813 CTCLoss sums over the probability of input to target, producing a loss value which is differentiable with 5814 respect to each input node. The alignment of input to target is assumed to be “many-to-one”, 5815 such that the length of target series must be less than or equal to the length of input. 5816 5817 Args: 5818 preprocess_collapse_repeated (bool): If ``True`` , repeated labels will be collapsed prior to the CTC 5819 calculation. Default: ``False`` . 5820 ctc_merge_repeated (bool): If ``False`` , during CTC calculation, repeated non-blank labels will not be merged 5821 and these labels will be interpreted as individual ones. This is a simplified 5822 version of CTC. Default: ``True`` . 5823 ignore_longer_outputs_than_inputs (bool): If ``True`` , sequences with longer outputs than inputs will be 5824 ignored. Default: ``False`` . 5825 5826 Inputs: 5827 - **x** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is 5828 :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes, `num_labels` 5829 indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`. 5830 Data type must be float16, float32 or float64. 5831 - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] = [b, t]` means 5832 `labels_values[i]` stores the id for `(batch b, time t)`. The type must be int64 and rank must be 2. 5833 - **labels_values** (Tensor) - A `1-D` input tensor. The values are associated with the given batch and time. 5834 The type must be int32. `labels_values[i]` must be in the range of `[0, num_classes)`. 5835 - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`. 5836 The type must be int32. Each value in the tensor must not be greater than `max_time`. 5837 5838 Outputs: 5839 - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch\_size, )`. 5840 The tensor has the same data type as `x`. 5841 - **gradient** (Tensor) - The gradient of `loss`, has the same shape and data type as `x`. 5842 5843 Raises: 5844 TypeError: If `preprocess_collapse_repeated`, `ctc_merge_repeated` or `ignore_longer_outputs_than_inputs` 5845 is not a bool. 5846 TypeError: If `x`, `labels_indices`, `labels_values` or `sequence_length` is not a Tensor. 5847 ValueError: If rank of `labels_indices` is not equal to 2. 5848 TypeError: If dtype of `x` is not one of the following: float16, float32 nor float64. 5849 TypeError: If dtype of `labels_indices` is not int64. 5850 TypeError: If dtype of `labels_values` or `sequence_length` is not int32. 5851 5852 Supported Platforms: 5853 ``Ascend`` ``GPU`` ``CPU`` 5854 5855 Examples: 5856 >>> import mindspore 5857 >>> import numpy as np 5858 >>> from mindspore import Tensor, ops 5859 >>> x = Tensor(np.array([[[0.3, 0.6, 0.6], 5860 ... [0.4, 0.3, 0.9]], 5861 ... 5862 ... [[0.9, 0.4, 0.2], 5863 ... [0.9, 0.9, 0.1]]]).astype(np.float32)) 5864 >>> labels_indices = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int64) 5865 >>> labels_values = Tensor(np.array([2, 2]), mindspore.int32) 5866 >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32) 5867 >>> ctc_loss = ops.CTCLoss() 5868 >>> loss, gradient = ctc_loss(x, labels_indices, labels_values, sequence_length) 5869 >>> print(loss) 5870 [ 0.79628 0.5995158 ] 5871 >>> print(gradient) 5872 [[[ 0.27029088 0.36485454 -0.6351454 ] 5873 [ 0.28140804 0.25462854 -0.5360366 ]] 5874 [[ 0.47548494 0.2883962 0.04510255 ] 5875 [ 0.4082751 0.4082751 0.02843709 ]]] 5876 """ 5877 5878 @prim_attr_register 5879 def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True, 5880 ignore_longer_outputs_than_inputs=False): 5881 """Initialize CTCLoss.""" 5882 self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"], 5883 outputs=["loss", "gradient"]) 5884 validator.check_value_type("preprocess_collapse_repeated", preprocess_collapse_repeated, [bool], self.name) 5885 self.preprocess_collapse_repeated_ = preprocess_collapse_repeated 5886 self.ctc_merge_repeated_ = validator.check_value_type("ctc_merge_repeated", ctc_merge_repeated, 5887 [bool], self.name) 5888 validator.check_value_type("ignore_longer_outputs_than_inputs", 5889 ignore_longer_outputs_than_inputs, [bool], self.name) 5890 self.ignore_longer_outputs_than_inputs_ = ignore_longer_outputs_than_inputs 5891 5892 5893class CTCGreedyDecoder(Primitive): 5894 r""" 5895 Performs greedy decoding on the logits given in inputs. 5896 5897 Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details. 5898 5899 Note: 5900 On Ascend, 'merge_repeated' can not be set to false. 5901 5902 Args: 5903 merge_repeated (bool, optional): If ``True`` , merge repeated classes in output. Default: ``True`` . 5904 5905 Inputs: 5906 - **inputs** (Tensor) - The input Tensor must be a 3-D tensor whose shape is 5907 :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes, 5908 `num_labels` indicates the number of actual labels. Blank labels are reserved. 5909 Default blank label is `num_classes - 1`. Data type must be float32 or float64. 5910 - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`. 5911 The type must be int32. Each value in the tensor must be equal to or less than `max_time`. 5912 5913 Outputs: 5914 - **decoded_indices** (Tensor) - A tensor with shape of :math:`(total\_decoded\_outputs, 2)`. 5915 Data type is int64. 5916 - **decoded_values** (Tensor) - A tensor with shape of :math:`(total\_decoded\_outputs, )`, 5917 it stores the decoded classes. Data type is int64. 5918 - **decoded_shape** (Tensor) - A tensor with shape of :math:`(batch\_size, max\_decoded\_length)`. 5919 Data type is int64. 5920 - **log_probability** (Tensor) - A tensor with shape of :math:`(batch\_size, 1)`, 5921 containing sequence log-probability, has the same type as `inputs`. 5922 5923 Supported Platforms: 5924 ``Ascend`` ``GPU`` ``CPU`` 5925 5926 Examples: 5927 >>> import mindspore 5928 >>> import numpy as np 5929 >>> from mindspore import Tensor, ops 5930 >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]], 5931 ... [[0.0, 0.6, 0.0], [0.5, 0.4, 0.5]]]), mindspore.float32) 5932 >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32) 5933 >>> decoded_indices, decoded_values, decoded_shape, log_probability = ops.CTCGreedyDecoder()(inputs, 5934 ... sequence_length) 5935 >>> print(decoded_indices) 5936 [[0 0] 5937 [0 1] 5938 [1 0]] 5939 >>> print(decoded_values) 5940 [0 1 0] 5941 >>> print(decoded_shape) 5942 [2 2] 5943 >>> print(log_probability) 5944 [[-1.2] 5945 [-1.3]] 5946 """ 5947 5948 @prim_attr_register 5949 def __init__(self, merge_repeated=True): 5950 """Initialize CTCGreedyDecoder.""" 5951 self.merge_repeated = validator.check_value_type("merge_repeated", merge_repeated, [bool], self.name) 5952 5953 5954class BasicLSTMCell(PrimitiveWithInfer): 5955 """ 5956 It's similar to operator :class:`mindspore.ops.DynamicRNN`. BasicLSTMCell will be deprecated in the future. 5957 Please use DynamicRNN instead. 5958 5959 Supported Platforms: 5960 Deprecated 5961 """ 5962 5963 @prim_attr_register 5964 def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'): 5965 """Initialize BasicLSTMCell.""" 5966 self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) 5967 self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name) 5968 self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name) 5969 self.state_is_tuple = validator.check_value_type("state_is_tuple", state_is_tuple, [bool], self.name) 5970 self.activation = validator.check_string(activation, ['tanh'], "activation", self.name) 5971 5972 def infer_shape(self, x_shape, h_shape, c_shape, w_shape, b_shape): 5973 validator.check_int(len(x_shape), 2, validator.EQ, "x rank", self.name) 5974 validator.check_int(len(h_shape), 2, validator.EQ, "h rank", self.name) 5975 validator.check_int(len(c_shape), 2, validator.EQ, "c rank", self.name) 5976 validator.check_int(len(w_shape), 2, validator.EQ, "w rank", self.name) 5977 validator.check_int(len(b_shape), 1, validator.EQ, "b rank", self.name) 5978 validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name) 5979 validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name) 5980 validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], validator.EQ, self.name) 5981 validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name) 5982 validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1], 5983 validator.EQ, self.name) 5984 validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name) 5985 ct_shape = c_shape 5986 ht_shape = c_shape 5987 it_shape = c_shape 5988 jt_shape = c_shape 5989 ft_shape = c_shape 5990 ot_shape = c_shape 5991 tanhct_shape = c_shape 5992 5993 return ct_shape, ht_shape, it_shape, jt_shape, ft_shape, ot_shape, tanhct_shape 5994 5995 def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype, b_dtype): 5996 tuple(map(partial(validator.check_tensor_dtype_valid, 5997 valid_dtypes=(mstype.float16, mstype.float32), prim_name=self.name), 5998 ("x_dtype", "h_dtype", "w_dtype"), 5999 (x_dtype, h_dtype, w_dtype))) 6000 args = {"c_dtype": c_dtype, "b_dtype": b_dtype} 6001 validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name) 6002 return c_dtype, mstype.float16, c_dtype, c_dtype, c_dtype, c_dtype, c_dtype 6003 6004 6005class DynamicRNN(Primitive): 6006 r""" 6007 Applies a recurrent neural network to the input. 6008 Only long short-term memory (LSTM) is supported currently. 6009 6010 .. math:: 6011 \begin{array}{ll} \\ 6012 i_{t+1} = \sigma(W_{ix} x_{t+1} + b_{ix} + W_{ih} h_{(t)} + b_{ih}) \\ 6013 f_{t+1} = \sigma(W_{fx} x_{t+1} + b_{fx} + W_{fh} h_{(t)} + b_{fh}) \\ 6014 \tilde{c}_{t+1} = \tanh(W_{cx} x_{t+1} + b_{cx} + W_{ch} h_{(t)} + b_{ch}) \\ 6015 o_{t+1} = \sigma(W_{ox} x_{t+1} + b_{ox} + W_{oh} h_{(t)} + b_{oh}) \\ 6016 c_{t+1} = f_{t+1} * c_{(t)} + i_t * \tilde{c}_{t+1} \\ 6017 h_{t+1} = o_{t+1} * \tanh(c_{t+1}) \\ 6018 \end{array} 6019 6020 :math:`h_{t+1}` is the hidden state at time `t+1`. :math:`x_{t+1}` is the input 6021 at time `t+1`. :math:`h_{t}` is the hidden state of the layer 6022 at time `t` or the initial hidden state at time `0`. 6023 :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b` 6024 are learnable weights between the output and the input in the formula. For instance, 6025 :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`. 6026 6027 Args: 6028 cell_type (str, optional): A string identifying the cell type in the operator. Default: ``'LSTM'`` . 6029 Only 'LSTM' is currently supported. 6030 direction (str, optional): A string identifying the direction in the operator. Default: ``'UNIDIRECTIONAL'`` . 6031 Only 'UNIDIRECTIONAL' is currently supported. 6032 cell_depth (int, optional): An integer identifying the cell depth in the operator. Default: ``1`` . 6033 use_peephole (bool, optional): A bool identifying if use peephole in the operator. Default: ``False`` . 6034 keep_prob (float, optional): A float identifying the keep prob in the operator. Default: ``1.0`` . 6035 cell_clip (float, optional): A float identifying the cell clip in the operator. Default: ``-1.0`` . 6036 num_proj (int, optional): An integer identifying the number projection in the operator. Default: ``0`` . 6037 time_major (bool, optional): A bool specify the data format of `x`. If it is set to ``True`` , the format is 6038 :math:`(num\_step, batch\_size, input\_size)`, if it is set to False, the format is 6039 :math:`(batch\_size, num\_step, input\_size)`. 6040 Default: ``True`` . Only supports ``True`` at present. 6041 activation (str, optional): A string identifying the type of activation function in the operator. 6042 Default: ``'tanh'`` . Only 'tanh' is currently supported. 6043 forget_bias (float, optional): A float identifying the forget bias in the operator. Default: ``0.0`` . 6044 is_training (bool, optional): A bool identifying is training in the operator. Default: ``True`` . 6045 6046 Inputs: 6047 - **x** (Tensor) - Current words. Tensor of shape :math:`(num\_step, batch\_size, input\_size)`. 6048 The data type must be float16. 6049 - **w** (Tensor) - Weight. Tensor of shape :math:`(input\_size + hidden\_size, 4 * hidden\_size)`. 6050 The data type must be float16. 6051 - **b** (Tensor) - Bias. Tensor of shape :math:`(4 * hidden\_size)`. 6052 The data type must be float16. 6053 - **seq_length** (Tensor) - The length of each batch. Tensor of shape :math:`(batch\_size, )`. 6054 Only `None` is currently supported. 6055 - **init_h** (Tensor) - Hidden state of initial time. Tensor of shape :math:`(1, batch\_size, hidden\_size)`. 6056 The data type must be float16. 6057 - **init_c** (Tensor) - Cell state of initial time. Tensor of shape :math:`(1, batch\_size, hidden\_size)`. 6058 The data type must be float16. 6059 6060 Outputs: 6061 - **y** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6062 Has the same type with input `b`. 6063 - **output_h** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6064 With data type of float16. 6065 - **output_c** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6066 Has the same type with input `b`. 6067 - **i** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6068 Has the same type with input `b`. 6069 - **j** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6070 Has the same type with input `b`. 6071 - **f** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6072 Has the same type with input `b`. 6073 - **o** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6074 Has the same type with input `b`. 6075 - **tanhct** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`. 6076 Has the same type with input `b`. 6077 6078 Raises: 6079 TypeError: If `cell_type`, `direction` or `activation` is not a str. 6080 TypeError: If `cell_depth` or `num_proj` is not an int. 6081 TypeError: If `keep_prob`, `cell_clip` or `forget_bias` is not a float. 6082 TypeError: If `use_peehpole`, `time_major` or `is_training` is not a bool. 6083 TypeError: If `x`, `w`, `b`, `seq_length`, `init_h` or `init_c` is not a Tensor. 6084 TypeError: If dtype of `x`, `w`, `init_h` or `init_c` is not float16. 6085 TypeError: If dtype of `b` is neither float16 nor float32. 6086 6087 Supported Platforms: 6088 ``Ascend`` 6089 6090 Examples: 6091 >>> import numpy as np 6092 >>> from mindspore import Tensor, ops 6093 >>> x = Tensor(np.random.rand(2, 16, 64).astype(np.float16)) 6094 >>> w = Tensor(np.random.rand(96, 128).astype(np.float16)) 6095 >>> b = Tensor(np.random.rand(128).astype(np.float16)) 6096 >>> init_h = Tensor(np.random.rand(1, 16, 32).astype(np.float16)) 6097 >>> init_c = Tensor(np.random.rand(1, 16, 32).astype(np.float16)) 6098 >>> dynamic_rnn = ops.DynamicRNN() 6099 >>> output = dynamic_rnn(x, w, b, None, init_h, init_c) 6100 >>> print(output[0].shape) 6101 (2, 16, 32) 6102 """ 6103 6104 @prim_attr_register 6105 def __init__(self, 6106 cell_type='LSTM', 6107 direction='UNIDIRECTIONAL', 6108 cell_depth=1, 6109 use_peephole=False, 6110 keep_prob=1.0, 6111 cell_clip=-1.0, 6112 num_proj=0, 6113 time_major=True, 6114 activation='tanh', 6115 forget_bias=0.0, 6116 is_training=True): 6117 """Initialize DynamicRNN.""" 6118 self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name) 6119 self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name) 6120 self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) 6121 validator.check_number_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, float, "keep_prob") 6122 self.cell_clip = validator.check_value_type("cell_clip", cell_clip, [float], self.name) 6123 self.num_proj = validator.check_non_negative_int(num_proj, "num_proj", self.name) 6124 self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name) 6125 self.use_peephole = validator.check_value_type("use_peephole", use_peephole, [bool], self.name) 6126 self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name) 6127 validator.check("time_major", time_major, "the supported value", True, validator.EQ, self.name) 6128 self.is_training = validator.check_value_type("is_training", is_training, [bool], self.name) 6129 validator.check_value_type("cell_type", cell_type, [str], self.name) 6130 self.cell_type = validator.check_string(cell_type, ['LSTM'], "cell_type", self.name) 6131 validator.check_value_type("direction", direction, [str], self.name) 6132 self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name) 6133 validator.check_value_type("activation", activation, [str], self.name) 6134 self.activation = validator.check_string(activation, ['tanh'], "activation", self.name) 6135 6136 6137class DynamicGRUV2(Primitive): 6138 r""" 6139 Applies a single-layer gated recurrent unit (GRU) to an input sequence. 6140 6141 .. math:: 6142 6143 \begin{array}{ll} 6144 r_{t+1} = \sigma(W_{ir} x_{t+1} + b_{ir} + W_{hr} h_{(t)} + b_{hr}) \\ 6145 z_{t+1} = \sigma(W_{iz} x_{t+1} + b_{iz} + W_{hz} h_{(t)} + b_{hz}) \\ 6146 n_{t+1} = \tanh(W_{in} x_{t+1} + b_{in} + r_{t+1} * (W_{hn} h_{(t)}+ b_{hn})) \\ 6147 h_{t+1} = (1 - z_{t+1}) * n_{t+1} + z_{t+1} * h_{(t)} 6148 \end{array} 6149 6150 where :math:`h_{t+1}` is the hidden state at time `t+1`, :math:`x_{t+1}` is the input 6151 at time `t+1`, :math:`h_{t}` is the hidden state of the layer 6152 at time `t` or the initial hidden state at time `0`. :math:`r_{t+1}`, 6153 :math:`z_{t+1}`, :math:`n_{t+1}` are the reset, update, and new gates, respectively. 6154 :math:`W`, :math:`b` are the weight parameter and the deviation parameter respectively. 6155 :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. 6156 6157 Args: 6158 direction (str, optional): A string identifying the direction in the operator. Default: ``'UNIDIRECTIONAL'`` . 6159 Only ``'UNIDIRECTIONAL'`` is currently supported. 6160 cell_depth (int, optional): An integer identifying the cell depth in the operator. Default: ``1`` . 6161 keep_prob (float, optional): A float identifying the keep prob in the operator. Default: ``1.0`` . 6162 cell_clip (float, optional): A float identifying the cell clip in the operator. Default: ``-1.0`` . 6163 num_proj (int, optional): An integer identifying the number projection in the operator. Default: ``0`` . 6164 time_major (bool, optional): A bool identifying the time major in the operator. Default: ``True`` . 6165 activation (str, optional) : A string identifying the type of activation function in the operator. 6166 Default: ``'tanh'`` . Only ``'tanh'`` is currently supported. 6167 gate_order (str, optional): A string identifying the gate order in weight and bias. Default: ``'rzh'`` . 6168 ``'zrh'`` is another option. Here, ``'rzh'`` means the gate order is: reset gate, update gate, hidden gate. 6169 ``'zrh'`` means the gate order is: update gate, reset gate, hidden gate. 6170 reset_after (bool, optional): A bool identifying whether to apply reset gate after matrix multiplication. 6171 Default: ``True`` . 6172 is_training (bool, optional): A bool identifying is training in the operator. Default: ``True`` . 6173 6174 Inputs: 6175 - **x** (Tensor) - Current words. 6176 Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{input_size})`. 6177 The data type must be float16. 6178 - **weight_input** (Tensor) - Input-hidden weight :math:`W_{\{ir,iz,in\}}`. 6179 Tensor of shape :math:`(\text{input_size}, 3 \times \text{hidden_size})`. 6180 The data type must be float16. 6181 - **weight_hidden** (Tensor) - Hidden-hidden weight :math:`W_{\{hr,hz,hn\}}`. 6182 Tensor of shape :math:`(\text{hidden_size}, 3 \times \text{hidden_size})`. 6183 The data type must be float16. 6184 - **bias_input** (Tensor) - Input-hidden bias :math:`b_{\{ir,iz,in\}}`. 6185 Tensor of shape :math:`(3 \times \text{hidden_size})`, or None. 6186 Has the same data type with input `init_h`. 6187 - **bias_hidden** (Tensor) - Hidden-hidden bias :math:`b_{\{hr,hz,hn\}}`. 6188 Tensor of shape :math:`(3 \times \text{hidden_size})`, 6189 or None. Has the same data type with input `init_h`. 6190 - **seq_length** (Tensor) - The length of each batch. Tensor of shape :math:`(\text{batch_size})`. 6191 Only `None` is currently supported. 6192 - **init_h** (Tensor) - Hidden state of initial time. 6193 Tensor of shape :math:`(\text{batch_size}, \text{hidden_size})`. 6194 The data type must be float16 or float32. 6195 6196 Outputs: 6197 - **y** (Tensor) - A Tensor of shape: 6198 6199 - y_shape = :math:`(num\_step, batch\_size, min(hidden\_size, num\_proj))`: `If num_proj > 0`, 6200 - y_shape = :math:`(num\_step, batch\_size, hidden\_size)`: `If num_proj = 0`. 6201 6202 Has the same data type with input `bias_type`. 6203 - **output_h** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`. 6204 Has the same data type with input `bias_type`. 6205 - **update** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`. 6206 Has the same data type with input `bias_type`. 6207 - **reset** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`. 6208 Has the same data type with input `bias_type`. 6209 - **new** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`. 6210 Has the same data type with input `bias_type`. 6211 - **hidden_new** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`. 6212 Has the same data type with input `bias_type`. 6213 6214 A note about the bias_type: 6215 6216 - If `bias_input` and `bias_hidden` both are `None`, `bias_type` is the data type of `init_h`. 6217 - If `bias_input` is not `None`, `bias_type` is the data type of `bias_input`. 6218 - If `bias_input` is `None` and `bias_hidden` is not `None`, `bias_type` is the data type of `bias_hidden`. 6219 6220 Raises: 6221 TypeError: If `direction`, `activation` or `gate_order` is not a str. 6222 TypeError: If `cell_depth` or `num_proj` is not an int. 6223 TypeError: If `keep_prob` or `cell_clip` is not a float. 6224 TypeError: If `time_major`, `reset_after` or `is_training` is not a bool. 6225 TypeError: If `x`, `weight_input`, `weight_hidden`, `bias_input`, `bias_hidden`, `seq_length` or `ini_h` is not 6226 a Tensor. 6227 TypeError: If dtype of `x`, `weight_input` or `weight_hidden` is not float16. 6228 TypeError: If dtype of `init_h` is neither float16 nor float32. 6229 6230 Supported Platforms: 6231 ``Ascend`` 6232 6233 Examples: 6234 >>> import numpy as np 6235 >>> from mindspore import Tensor, ops 6236 >>> x = Tensor(np.random.rand(2, 8, 64).astype(np.float16)) 6237 >>> weight_i = Tensor(np.random.rand(64, 48).astype(np.float16)) 6238 >>> weight_h = Tensor(np.random.rand(16, 48).astype(np.float16)) 6239 >>> bias_i = Tensor(np.random.rand(48).astype(np.float16)) 6240 >>> bias_h = Tensor(np.random.rand(48).astype(np.float16)) 6241 >>> init_h = Tensor(np.random.rand(8, 16).astype(np.float16)) 6242 >>> dynamic_gru_v2 = ops.DynamicGRUV2() 6243 >>> output = dynamic_gru_v2(x, weight_i, weight_h, bias_i, bias_h, None, init_h) 6244 >>> print(output[0].shape) 6245 (2, 8, 16) 6246 """ 6247 6248 @prim_attr_register 6249 def __init__(self, 6250 direction='UNIDIRECTIONAL', 6251 cell_depth=1, 6252 keep_prob=1.0, 6253 cell_clip=-1.0, 6254 num_proj=0, 6255 time_major=True, 6256 activation="tanh", 6257 gate_order="rzh", 6258 reset_after=True, 6259 is_training=True): 6260 """Initialize DynamicGRUV2.""" 6261 self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name) 6262 self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) 6263 self.cell_clip = validator.check_value_type("cell_clip", cell_clip, [float], self.name) 6264 self.num_proj = validator.check_non_negative_int(num_proj, "num_proj", self.name) 6265 self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name) 6266 self.is_training = validator.check_value_type("is_training", is_training, [bool], self.name) 6267 self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name) 6268 self.activation = validator.check_string(activation, ['tanh'], "activation", self.name) 6269 self.gate_order = validator.check_string(gate_order, ['zrh', 'rzh'], "gate_order", self.name) 6270 self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name) 6271 self.init_prim_io_names( 6272 inputs=[ 6273 "x", "weight_input", "weight_hidden", "bias_input", 6274 "bias_hidden", "seq_length", "init_h" 6275 ], 6276 outputs=["y", "output_h", "update", "reset", "new", "hidden_new"]) 6277 6278 6279class InTopK(Primitive): 6280 r""" 6281 Determines whether the targets are in the top `k` predictions. 6282 6283 Refer to :func:`mindspore.ops.intopk` for more details. 6284 6285 Args: 6286 k (int): Specifies the number of top elements to be used for computing precision along the last dimension. 6287 6288 Inputs: 6289 - **x1** (Tensor) - A 2D Tensor defines the predictions of a batch of samples with float16 or float32 6290 data type. 6291 - **x2** (Tensor) - A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of `x2` 6292 must be equal to the first dimension of `x1`. The values of `x2` can not be negative and 6293 must be equal to or less than index of x1's second dimension. 6294 6295 Outputs: 6296 Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`, 6297 if the label in the first `k` predictions for sample `i` is in `x1`, then the value is ``True`` , 6298 otherwise ``False`` . 6299 6300 Supported Platforms: 6301 ``Ascend`` ``GPU`` ``CPU`` 6302 6303 Examples: 6304 >>> import mindspore 6305 >>> import numpy as np 6306 >>> from mindspore import Tensor, ops 6307 >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32) 6308 >>> x2 = Tensor(np.array([1, 3]), mindspore.int32) 6309 >>> in_top_k = ops.InTopK(3) 6310 >>> output = in_top_k(x1, x2) 6311 >>> print(output) 6312 [ True False] 6313 """ 6314 6315 @prim_attr_register 6316 def __init__(self, k): 6317 """Initialize InTopK""" 6318 self.init_prim_io_names(inputs=['x1', 'x2', 'k'], outputs=['y']) 6319 validator.check_value_type("k", k, [int], self.name) 6320 6321 6322class LRN(Primitive): 6323 r""" 6324 Local Response Normalization. 6325 6326 .. warning:: 6327 LRN is deprecated on Ascend due to potential accuracy problem. It's recommended to use other 6328 normalization methods, e.g. :class:`mindspore.ops.BatchNorm`. 6329 6330 .. math:: 6331 6332 b_{c} = a_{c}\left(k + \frac{\alpha}{n} 6333 \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta} 6334 6335 where the :math:`a_{c}` indicates the specific value of the pixel corresponding to :math:`c` in feature map; 6336 where the :math:`n/2` indicates the `depth_radius`; where the :math:`k` indicates the `bias`; 6337 where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`. 6338 6339 Args: 6340 depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: ``5`` . 6341 bias (float): An offset (usually positive to avoid dividing by 0). Default: ``1.0`` . 6342 alpha (float): A scale factor, usually positive. Default: ``1.0`` . 6343 beta (float): An exponent. Default: ``0.5`` . 6344 norm_region (str): Specifies normalization region. Options: ``"ACROSS_CHANNELS"`` . 6345 Default: ``"ACROSS_CHANNELS"`` . 6346 6347 Inputs: 6348 - **x** (Tensor) - A 4-D Tensor with float16 or float32 data type. 6349 6350 Outputs: 6351 Tensor, with the same shape and data type as `x`. 6352 6353 Raises: 6354 TypeError: If `depth_radius` is not an int. 6355 TypeError: If `bias`, `alpha` or `beta` is not a float. 6356 TypeError: If `norm_region` is not a str. 6357 TypeError: If `x` is not a Tensor. 6358 6359 Supported Platforms: 6360 ``GPU`` ``CPU`` 6361 6362 Examples: 6363 >>> import mindspore 6364 >>> import numpy as np 6365 >>> from mindspore import Tensor, ops 6366 >>> x = Tensor(np.array([[[[0.1], [0.2]], 6367 ... [[0.3], [0.4]]]]), mindspore.float32) 6368 >>> lrn = ops.LRN() 6369 >>> output = lrn(x) 6370 >>> print(output) 6371 [[[[0.09534626] 6372 [0.1825742 ]] 6373 [[0.2860388 ] 6374 [0.3651484 ]]]] 6375 """ 6376 6377 @prim_attr_register 6378 def __init__(self, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"): 6379 """Initialize LRN""" 6380 super().__init__("LRN") 6381 self.init_prim_io_names(inputs=['x'], outputs=['y']) 6382 validator.check_value_type("depth_radius", depth_radius, [int], self.name) 6383 validator.check_value_type("bias", bias, [float], self.name) 6384 validator.check_value_type("alpha", alpha, [float], self.name) 6385 validator.check_value_type("beta", beta, [float], self.name) 6386 validator.check_value_type("norm_region", norm_region, [str], self.name) 6387 validator.check_string(norm_region, ['ACROSS_CHANNELS'], 'norm_region', self.name) 6388 validator.check_non_negative_int(depth_radius, "depth_radius", self.name) 6389 6390 6391class AvgPool3D(Primitive): 6392 r""" 6393 3D Average pooling operation. 6394 6395 Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, AvgPool3D outputs 6396 regional average in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size 6397 :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows. 6398 6399 .. warning:: 6400 "kernel_size" is in the range [1, 255]. "strides" is in the range [1, 63]. 6401 6402 .. math:: 6403 \text{output}(N_i, C_j, d, h, w) = 6404 \frac{1}{d_{ker} * h_{ker} * w_{ker}} \sum_{l=0}^{d_{ker}-1} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1} 6405 \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n) 6406 6407 Args: 6408 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value, 6409 is an int number that represents depth, height and width are both kernel_size, or a tuple 6410 of three int numbers that represent depth, height and width respectively. Default: ``1`` . 6411 strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 6412 the depth, height and width of movement are both strides, or a tuple of three int numbers that 6413 represent depth, height and width of movement respectively. Default: ``1`` . 6414 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 6415 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 6416 6417 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 6418 are the same when `stride` is set to ``1``. 6419 The amount of padding to is calculated by the operator internally. If the amount is even, 6420 it isuniformly distributed around the input, if it is odd, the excess amount goes 6421 to the front/right/bottom side. 6422 If this mode is set, `pad` must be 0. 6423 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 6424 possible depth, height and width. Extra pixels that could not complete a full stride will 6425 be discarded. If this mode is set, `pad` must be 0. 6426 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 6427 in the depth, height and width dimension is determined by the `pad` parameter. 6428 If this mode is set, `pad` must be greater than or equal to 0. 6429 6430 pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, 6431 the paddings of head, tail, top, bottom, left and right are the same, equal to pad. 6432 If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to 6433 pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly. 6434 ceil_mode (bool): If ``True`` , ceil instead of floor to compute the output shape. Default: ``False`` . 6435 count_include_pad (bool): If ``True`` , averaging calculation will include the zero-padding. 6436 Default: ``True`` . 6437 divisor_override (int): If specified, it will be used as divisor in the averaging calculation, 6438 otherwise kernel_size will be used. Default: ``0`` . 6439 data_format (str) : The optional value for data format. Currently only support ``'NCDHW'`` . 6440 Default: ``'NCDHW'`` . 6441 6442 Inputs: 6443 - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`. 6444 Currently support float16, float32 and float64 data type. 6445 6446 Outputs: 6447 Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `x`. 6448 6449 Raises: 6450 TypeError: If `kernel_size`, `strides` or `pad` is neither an int not a tuple. 6451 TypeError: If `ceil_mode` or `count_include_pad` is not a bool. 6452 TypeError: If `pad_mode` or `data_format` is not a string. 6453 TypeError: If `divisor_override` is not an int. 6454 ValueError: If numbers in `kernel_size` or `strides` are not positive. 6455 ValueError: If `kernel_size` or `strides` is a tuple whose length is not equal to 3. 6456 ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'. 6457 ValueError: If `pad` is a tuple whose length is not equal to 6. 6458 ValueError: If element of `pad` is less than 0. 6459 ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to 0 or (0, 0, 0, 0, 0, 0). 6460 ValueError: If `data_format` is not 'NCDHW'. 6461 6462 Supported Platforms: 6463 ``Ascend`` ``GPU`` ``CPU`` 6464 6465 Examples: 6466 >>> import mindspore 6467 >>> from mindspore import Tensor, ops 6468 >>> import numpy as np 6469 >>> x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16) 6470 >>> avg_pool3d = ops.AvgPool3D(kernel_size=2, strides=1, pad_mode="valid") 6471 >>> output = avg_pool3d(x) 6472 >>> print(output) 6473 [[[[[ 5. 6.]]] 6474 [[[17. 18.]]]]] 6475 """ 6476 6477 @prim_attr_register 6478 def __init__(self, kernel_size=1, strides=1, pad_mode="valid", pad=0, ceil_mode=False, 6479 count_include_pad=True, divisor_override=0, data_format="NCDHW"): 6480 """Initialize AvgPool3D""" 6481 self.init_prim_io_names(inputs=['input'], outputs=['output']) 6482 self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name, ret_five=True) 6483 self.add_prim_attr('kernel_size', self.kernel_size) 6484 self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True) 6485 self.add_prim_attr('strides', self.strides) 6486 validator.check_value_type('pad', pad, (int, tuple, list), self.name) 6487 if isinstance(pad, int): 6488 pad = (pad,) * 6 6489 if len(pad) != 6: 6490 raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of " 6491 f"six positive int numbers, but got {self.pad}.") 6492 self.pad_list = pad 6493 self.add_prim_attr('pad_list', self.pad_list) 6494 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 6495 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME', 'PAD'], 'pad_mode', self.name) 6496 self.add_prim_attr('pad_mode', self.pad_mode) 6497 6498 if self.pad_mode != 'PAD' and pad != (0, 0, 0, 0, 0, 0): 6499 raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' " 6500 f"is not \"PAD\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.") 6501 if self.pad_mode == 'PAD': 6502 for item in pad: 6503 validator.check_non_negative_int(item, 'pad or item of pad', self.name) 6504 self.ceil_mode = validator.check_value_type('ceil_mode', ceil_mode, bool, self.name) 6505 self.count_include_pad = validator.check_value_type('count_include_pad', count_include_pad, bool, self.name) 6506 self.divisor_override = validator.check_non_negative_int(divisor_override, 'divisor_override', self.name) 6507 self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name) 6508 6509 6510class Conv3D(Primitive): 6511 r""" 6512 3D convolution layer. 6513 6514 Applies a 3D convolution over an input tensor which is typically of shape 6515 :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, 6516 where :math:`N` is batch size, :math:`C` is channel number, 6517 :math:`D, H, W` 6518 are the depth, height and width of the feature map, respectively. 6519 6520 The output is calculated based on formula: 6521 6522 .. math:: 6523 6524 \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + 6525 \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)}) 6526 6527 where :math:`bias` is the output channel bias, :math:`ccor` is 6528 the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_, 6529 :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map. 6530 6531 Here are the indices' meanings: 6532 6533 - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`, 6534 where :math:`N` is the batch size of the input. 6535 6536 - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`, 6537 where :math:`C_{out}` is the number of 6538 output channels, which is also equal to the number of kernels. 6539 6540 - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`, 6541 where :math:`C_{in}` is the number of 6542 input channels, which is also equal to the number of channels in the convolutional kernels. 6543 6544 Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th 6545 output channel, :math:`{weight}(C_{\text{out}_j}, k)`represents the slice of the :math:`j`-th convolutional 6546 kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input 6547 channel in the :math:`i`-th batch of the input feature map. 6548 6549 The shape of the convolutional kernel is given by 6550 :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})` 6551 where :math:`\text{kernel_size[0]}` , 6552 :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth, 6553 height and width of the kernel, respectively. 6554 If we consider the input and output channels as well as the `group` parameter, the complete kernel shape 6555 will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, 6556 \text{kernel_size[1]}, \text{kernel_size[2]})`, 6557 where `group` is the number of groups dividing `x`'s input channel when applying group convolution. 6558 6559 For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition 6560 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 6561 6562 Note: 6563 1. On Ascend platform, :math:`groups=1` must be satisfied. 6564 2. On Ascend :math:`dilation` on depth only supports the case of 1. 6565 6566 Args: 6567 out_channel (int): Specifies output channel :math:`C_{out}`. 6568 kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. 6569 It can be a single int or a tuple of 3 integers. A single int means the value is for depth, height 6570 and the width. A tuple of 3 ints means the first value is for depth and 6571 the rest is for the height and width. 6572 mode (int, optional): Modes for different convolutions. It is currently not used. Default: ``1`` . 6573 stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number 6574 that represents the depth, height and width of movement or a tuple of three int numbers that 6575 represent depth, height and width movement respectively. Default: ``1`` . 6576 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 6577 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 6578 6579 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 6580 are the same when `stride` is set to ``1``. 6581 The amount of padding to is calculated by the operator internally. If the amount is even, 6582 it isuniformly distributed around the input, if it is odd, the excess amount goes 6583 to the front/right/bottom side. 6584 If this mode is set, `pad` must be 0. 6585 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 6586 possible depth, height and width. Extra pixels that could not complete a full stride will 6587 be discarded. If this mode is set, `pad` must be 0. 6588 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 6589 in the depth, height and width dimension is determined by the `pad` parameter. 6590 If this mode is set, `pad` must be greater than or equal to 0. 6591 6592 pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input 6593 when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 6 ints. 6594 If `pad` is one integer, the paddings of head, tail, top, bottom, 6595 left and right are the same, equal to `pad`. If `pad` is a tuple with 6 integers, the 6596 paddings of head, tail, top, bottom, left and right is equal to pad[0], 6597 pad[1], pad[2], pad[3], pad[4] and pad[5] accordingly. Default: ``0`` . 6598 dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to use for dilated convolution. 6599 It can be a single int or a tuple of 3 integers. A single int means the dilation size is the same 6600 in the depth, height and width directions. A tuple of 3 ints represents the dilation size in 6601 the depth, height and width directions, respectively. 6602 Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a 6603 spacing of :math:`d0-1` elements in the depth direction, 6604 :math:`d1-1` elements in the height direction, :math:`d2-1` elements in the 6605 width direction respectively. The values in the depth, height and width dimensions are in the 6606 ranges [1, D], [1, H] and [1, W], respectively. 6607 Default: ``1`` . 6608 group (int, optional): The number of groups into which the filter is divided. `in_channels` 6609 and `out_channels` must be divisible by `group`. Default: ``1`` . 6610 data_format (str, optional): The optional value for data format. Currently only support ``"NCDHW"`` . 6611 6612 Inputs: 6613 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 6614 Currently input data type only support float16 and float32. 6615 - **weight** (Tensor) - Set size of kernel is :math:`(k_d, K_h, K_w)`, then the shape is 6616 :math:`(C_{out}, C_{in}/groups, k_d, K_h, K_w)`. 6617 Currently weight data type only support float16 and float32. 6618 - **bias** (Tensor) - Tensor of shape :math:`(C_{out})`. When bias is None, zeros will be used. 6619 Default: ``None`` . 6620 6621 Outputs: 6622 Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. 6623 6624 `pad_mode` is ``"same"``: 6625 6626 .. math:: 6627 \begin{array}{ll} \\ 6628 D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ 6629 H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ 6630 W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ 6631 \end{array} 6632 6633 `pad_mode` is ``"valid"``: 6634 6635 .. math:: 6636 \begin{array}{ll} \\ 6637 D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } 6638 {\text{stride[0]}} + 1} \right \rfloor \\ 6639 H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } 6640 {\text{stride[1]}} + 1} \right \rfloor \\ 6641 W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } 6642 {\text{stride[2]}} + 1} \right \rfloor \\ 6643 \end{array} 6644 6645 `pad_mode` is ``"pad"``: 6646 6647 .. math:: 6648 \begin{array}{ll} \\ 6649 D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times 6650 \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ 6651 H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times 6652 \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ 6653 W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times 6654 \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ 6655 \end{array} 6656 6657 Raises: 6658 TypeError: If `out_channel` or `group` is not an int. 6659 TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple. 6660 ValueError: If `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1. 6661 ValueError: If `pad` is less than 0. 6662 ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'. 6663 ValueError: If `pad` is a tuple whose length is not equal to 6. 6664 ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0). 6665 ValueError: If `data_format` is not 'NCDHW'. 6666 6667 Supported Platforms: 6668 ``Ascend`` ``GPU`` ``CPU`` 6669 6670 Examples: 6671 >>> import mindspore 6672 >>> import numpy as np 6673 >>> from mindspore import Tensor, ops 6674 >>> # case 1: specify kernel_size with tuple, all parameters use default values. 6675 >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16) 6676 >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16) 6677 >>> conv3d = ops.Conv3D(out_channel=32, kernel_size=(4, 3, 3)) 6678 >>> output = conv3d(x, weight) 6679 >>> print(output.shape) 6680 (16, 32, 7, 30, 30) 6681 >>> # case 2: specify kernel_size with int, all parameters use default values. 6682 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6683 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6684 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3) 6685 >>> output = conv3d(x, weight) 6686 >>> print(output.shape) 6687 (10, 40, 30, 30, 30) 6688 >>> # case 3: stride=(1, 2, 3), other parameters being default. 6689 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6690 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6691 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3)) 6692 >>> output = conv3d(x, weight) 6693 >>> print(output.shape) 6694 (10, 40, 30, 15, 10) 6695 >>> # case 4: pad_mode="pad", other parameters being default. 6696 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6697 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6698 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, pad_mode="pad", pad=2) 6699 >>> output = conv3d(x, weight) 6700 >>> print(output.shape) 6701 (10, 40, 34, 34, 34) 6702 >>> # case 5: dilation=(1, 1, 1), other parameters being default. 6703 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6704 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6705 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, dilation=(1, 1, 1)) 6706 >>> output = conv3d(x, weight) 6707 >>> print(output.shape) 6708 (10, 40, 30, 30, 30) 6709 >>> # case 6: group=1, other parameters being default. 6710 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6711 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6712 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, group=1) 6713 >>> output = conv3d(x, weight) 6714 >>> print(output.shape) 6715 (10, 40, 30, 30, 30) 6716 >>> # case 7: All parameters are specified. 6717 >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32) 6718 >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32) 6719 >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3), pad_mode="pad", 6720 ... pad=2, dilation=(1), group=1) 6721 >>> output = conv3d(x, weight) 6722 >>> print(output.shape) 6723 (10, 40, 34, 17, 12) 6724 """ 6725 6726 @prim_attr_register 6727 def __init__(self, 6728 out_channel, 6729 kernel_size, 6730 mode=1, 6731 pad_mode="valid", 6732 pad=0, 6733 stride=1, 6734 dilation=1, 6735 group=1, 6736 data_format="NCDHW"): 6737 """Initialize Conv3D""" 6738 self.init_prim_io_names(inputs=['x', 'w'], outputs=['output']) 6739 self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name) 6740 if isinstance(kernel_size, int): 6741 self.kernel_size = (kernel_size,) * 3 6742 self.add_prim_attr('kernel_size', self.kernel_size) 6743 self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, ret_five=True) 6744 self.add_prim_attr('strides', self.stride) 6745 target = context.get_context("device_target") 6746 if target.lower() == "ascend": 6747 self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False, 6748 ret_five=True, third_one=True) 6749 else: 6750 self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False, 6751 ret_five=True, third_one=False) 6752 self.add_prim_attr('dilations', self.dilation) 6753 validator.check_value_type('pad', pad, (int, tuple), self.name) 6754 if isinstance(pad, int): 6755 pad = (pad,) * 6 6756 if len(pad) != 6: 6757 raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of " 6758 f"six positive int numbers, but got {self.pad}.") 6759 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 6760 self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) 6761 self.add_prim_attr('pad_mode', self.pad_mode) 6762 6763 if self.pad_mode != 'pad' and pad != (0, 0, 0, 0, 0, 0): 6764 raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' " 6765 f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.") 6766 self.add_prim_attr("pad", pad) 6767 self.padding = pad 6768 if self.pad_mode == 'pad': 6769 for item in pad: 6770 validator.check_non_negative_int(item, 'pad item', self.name) 6771 6772 self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) 6773 self.add_prim_attr('mode', self.mode) 6774 self.format = validator.check_string(data_format, ['NCDHW'], 'data_format', self.name) 6775 self.add_prim_attr('data_format', self.format) 6776 self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) 6777 validator.check_value_type("group", group, (int,), self.name) 6778 validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name) 6779 device_target = context.get_context("device_target") 6780 if self.out_channel % group != 0: 6781 raise ValueError("The argument 'group' should be divisible by 'out_channel'") 6782 if device_target == "Ascend" and group != 1: 6783 raise ValueError("On Ascend platform, group = 1 must be satisfied.") 6784 6785 self.group = group 6786 self.add_prim_attr('groups', self.group) 6787 self.add_prim_attr('offset_x', 0) 6788 6789 6790class Conv3DBackpropInput(Primitive): 6791 """ 6792 Computes the gradients of convolution 3D with respect to the input. 6793 6794 Args: 6795 out_channel (int): The dimension of the output. 6796 kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution. 6797 mode (int): Modes for different convolutions. Not currently used. 6798 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 6799 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 6800 6801 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 6802 are the same when `stride` is set to ``1``. 6803 The amount of padding to is calculated by the operator internally. If the amount is even, 6804 it isuniformly distributed around the input, if it is odd, the excess amount goes 6805 to the front/right/bottom side. 6806 If this mode is set, `pad` must be 0. 6807 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 6808 possible depth, height and width. Extra pixels that could not complete a full stride will 6809 be discarded. If this mode is set, `pad` must be 0. 6810 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 6811 in the depth, height and width dimension is determined by the `pad` parameter. 6812 If this mode is set, `pad` must be greater than or equal to 0. 6813 6814 pad (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the 6815 paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a 6816 tuple of four integers, the padding of head, tail, top, bottom, left and right equal to pad[0], 6817 pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly. 6818 stride (Union(int, tuple[int])): The stride to be applied to the convolution filter. Default: ``1`` . 6819 dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: ``1`` . 6820 group (int): Splits input into groups. Default: ``1`` . 6821 data_format (str): The optional value for data format. Currently only support ``'NCDHW'`` . 6822 6823 Inputs: 6824 - **weight** (Tensor) - Set size of kernel is :math:`(D_{in}, K_h, K_w)`, then the shape is 6825 :math:`(C_{out}, C_{in}, D_{in}, K_h, K_w)`. Currently weight data type only support float16 and float32. 6826 - **dout** (Tensor) - the gradients with respect to the output of the convolution. 6827 The shape conforms to the default. 6828 data_format :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. Currently dout data type only support float16 6829 and float32. 6830 - **input_size** (tuple(int)) - A tuple describes the shape of the input which conforms to the format 6831 :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 6832 6833 Outputs: 6834 Tensor, the gradients with respect to the input of convolution 3D. It has the same shape as the input. 6835 6836 Raises: 6837 TypeError: If `out_channel` or `group` is not an int. 6838 TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int not a tuple. 6839 ValueError: If `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1. 6840 ValueError: If `pad` is less than 0. 6841 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 6842 ValueError: If `pad` is a tuple whose length is not equal to 6. 6843 ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0). 6844 ValueError: If `data_format` is not 'NCDHW'. 6845 6846 Supported Platforms: 6847 ``Ascend`` 6848 6849 Examples: 6850 >>> import numpy as np 6851 >>> import mindspore 6852 >>> from mindspore import Tensor, ops 6853 >>> dout = Tensor(np.ones([16, 32, 10, 32, 32]), mindspore.float16) 6854 >>> weight = Tensor(np.ones([32, 32, 4, 6, 2]), mindspore.float16) 6855 >>> x = Tensor(np.ones([16, 32, 13, 37, 33])) 6856 >>> conv3d_backprop_input = ops.Conv3DBackpropInput(out_channel=4, kernel_size=(4, 6, 2)) 6857 >>> output = conv3d_backprop_input(dout, weight, ops.shape(x)) 6858 >>> print(output.shape) 6859 (16, 32, 13, 37, 33) 6860 """ 6861 6862 @prim_attr_register 6863 def __init__(self, 6864 out_channel, 6865 kernel_size, 6866 mode=1, 6867 pad_mode="valid", 6868 pad=0, 6869 stride=1, 6870 dilation=1, 6871 group=1, 6872 data_format="NCDHW"): 6873 """Initialize Conv3DBackpropInput""" 6874 self.init_prim_io_names(inputs=['filter', 'out_backprop', 'input_size'], outputs=['y']) 6875 self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) 6876 self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name) 6877 self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=True, ret_five=True) 6878 self.add_prim_attr('strides', self.stride) 6879 self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=True, ret_five=True) 6880 self.add_prim_attr('dilations', self.dilation) 6881 validator.check_value_type('pad', pad, (int, tuple), self.name) 6882 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 6883 if isinstance(pad, int): 6884 pad = (pad,) * 6 6885 validator.check_equal_int(len(pad), 6, 'pad size', self.name) 6886 self.add_prim_attr("pad", pad) 6887 self.pad_list = pad 6888 6889 self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) 6890 if self.pad_mode != 'pad' and self.pad_list != (0, 0, 0, 0, 0, 0): 6891 raise ValueError(f"For '{self.name}', the 'pad' must be (0, 0, 0, 0, 0, 0) " 6892 f"when 'pad_mode' is not \"pad\", " 6893 f"but got 'pad' is {self.pad_list} and 'pad_mode' is {self.pad_mode}.") 6894 if self.pad_mode == 'pad': 6895 for item in pad: 6896 validator.check_non_negative_int(item, 'pad item', self.name) 6897 self.add_prim_attr('pad_mode', self.pad_mode) 6898 6899 self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) 6900 self.add_prim_attr('mode', self.mode) 6901 self.group = validator.check_positive_int(group, 'group', self.name) 6902 self.add_prim_attr('groups', self.group) 6903 self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name) 6904 self.add_prim_attr('data_format', self.format) 6905 6906 6907def _deconv_output_length(input_length, kernel_size, stride_size, dilation_size): 6908 filter_size = kernel_size + (kernel_size - 1) * (dilation_size - 1) 6909 if filter_size - stride_size > 0: 6910 length = input_length * stride_size + filter_size - stride_size 6911 else: 6912 length = input_length * stride_size 6913 return length 6914 6915 6916class SparseApplyAdadelta(Primitive): 6917 r""" 6918 Updates relevant entries according to the adadelta scheme. 6919 6920 .. math:: 6921 \begin{array}{ll} \\ 6922 accum = \rho * accum + (1 - \rho) * grad^2 \\ 6923 \text{update} = \sqrt{\text{accum_update} + \epsilon} * \frac{grad}{\sqrt{accum + \epsilon}} \\ 6924 var = var - update * lr \\ 6925 \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * update^2 \\ 6926 \end{array} 6927 6928 Inputs of 'var', 'accum', 'accum_update' and 'grad' comply with the implicit type conversion rules 6929 to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion. 6930 If they have different data types, the lower priority data type will be converted to 6931 relatively highest priority data type. 6932 RuntimeError exception will be thrown when the data type conversion of Parameter is required. 6933 6934 Note: 6935 If there are negative values or values greater than or equal to var.shape[0] in `indices`, 6936 the behavior is undefined. Besides, this operator doesn't support duplicates in `indices`. 6937 6938 Args: 6939 epsilon (float): A small value added for numerical stability. Its value must be greater or equal to 0. 6940 use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated. 6941 Default: ``False`` . 6942 6943 Inputs: 6944 - **var** (Parameter) - Weights to be updated. With float32 or float16 data type. 6945 - **accum** (Parameter) - Accumulation to be updated. Mush have the same shape and dtype as `var`. 6946 With float32 or float16 data type. 6947 - **accum_update** (Parameter) - Accum_update to be updated. Must have the same shape and dtype as `var`. 6948 With float32 or float16 data type. 6949 - **lr** (Union[float, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type. 6950 - **rho** (Union[float, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type. 6951 - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`. 6952 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 6953 Must be one of the following types: int32, int64 and indices.shape[0] = grad.shape[0]. 6954 6955 Outputs: 6956 Tuple of 3 Tensor, the updated parameters. 6957 6958 - **var** (Tensor) - The same shape and data type as `var`. 6959 - **accum** (Tensor) - The same shape and data type as `accum`. 6960 - **accum_update** (Tensor) - The same shape and data type as `accum_update`. 6961 6962 Raises: 6963 TypeError: If `epsilon` is not a float. 6964 TypeError: If `use_locking` is not a bool. 6965 TypeError: If `var`, 'accum', 'accum_update' is not a Parameter. 6966 TypeError: If dtype of `accum`, `accum_updata`, `grad` is not same as `var`. 6967 TypeError: If dtype of `var`, `accum`, `accum_update`, `lr`, `rho` or `grad` is neither float16 nor 6968 float32. 6969 TypeError: If dtype of `indices` is neither int32 nor int64. 6970 ValueError: If `epsilon` is less than 0. 6971 ValueError: If the shape of `accum`, `accum_updata`, `grad` is not same as `var`. 6972 ValueError: If the rank of `indices` is not equal to 1. 6973 ValueError: If shape of `indices` is not same as shape of first dimension of `grad`. 6974 6975 Supported Platforms: 6976 ``Ascend`` 6977 6978 Examples: 6979 >>> class Net(nn.Cell): 6980 ... def __init__(self,epsilon,use_locking = False): 6981 ... super(Net, self).__init__() 6982 ... self.sparse_apply_adadelta = P.SparseApplyAdadelta(epsilon,use_locking) 6983 ... self.var = Parameter(Tensor(np.array([[1.0,2.0],[2.0,3.0]]).astype(np.float32)), name="var") 6984 ... self.accum = Parameter(Tensor(np.array([[1.5,2.5],[3.5,4.5]]).astype(np.float32)), name="accum") 6985 ... self.accum_update = Parameter(Tensor(np.array([[1.2,2.4],[1.8,0.6]]).astype(np.float32)), 6986 ... name="accum_update") 6987 ... def construct(self, lr, rho, grad, indices): 6988 ... out = self.sparse_apply_adadelta(self.var, self.accum, self.accum_update, lr, rho, grad, indices) 6989 ... return out 6990 ... 6991 >>> epsilon = 1e-6 6992 >>> net = Net(epsilon) 6993 >>> lr = 0.01 6994 >>> rho = 0.2 6995 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 6996 >>> output = net(lr, rho, grad, Tensor(np.array([0,1],dtype=np.int32))) 6997 >>> print(output) 6998 (Tensor(shape=[2, 2], dtype=Float32, value= 6999 [[ 9.94611859e-01, 1.98851788e+00], 7000 [ 1.99840558e+00, 2.99478507e+00]]), Tensor(shape=[2, 2], dtype=Float32, value= 7001 [[ 3.72000009e-01, 8.91999960e-01], 7002 [ 7.08000004e-01, 1.41200006e+00]]), Tensor(shape=[2, 2], dtype=Float32, value= 7003 [[ 4.72257614e-01, 1.53470778e+00], 7004 [ 3.80338937e-01, 3.37563992e-01]])) 7005 """ 7006 7007 __mindspore_signature__ = ( 7008 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7009 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7010 sig.make_sig('accum_updata', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7011 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 7012 sig.make_sig('rho', dtype=sig.sig_dtype.T1), 7013 sig.make_sig('grad', dtype=sig.sig_dtype.T), 7014 sig.make_sig('indices', dtype=sig.sig_dtype.T2), 7015 ) 7016 7017 @prim_attr_register 7018 def __init__(self, epsilon, use_locking=False): 7019 """Initialize SparseApplyAdadelta""" 7020 validator.check_value_type("epsilon", epsilon, [float], self.name) 7021 validator.check_number("epsilon", epsilon, 0.0, validator.GE, self.name) 7022 validator.check_value_type("use_locking", use_locking, [bool], self.name) 7023 7024 7025class CTCLossV2(Primitive): 7026 """ 7027 Calculates the CTC (Connectionist Temporal Classification) loss and the gradient. 7028 7029 The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with 7030 Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_. 7031 7032 .. warning:: 7033 This is an experimental API that is subject to change or deletion. 7034 7035 Args: 7036 blank (int, optional): The blank label. Default: ``0`` . 7037 reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'``. 7038 Default: ``'none'`` . 7039 7040 zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss 7041 and its correlated gradient to zero. Default: ``False`` . 7042 7043 Inputs: 7044 - **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is 7045 batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64. 7046 - **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length, 7047 means the target sequences. Supported dtypes: int32, int64. 7048 - **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`. 7049 It means the lengths of the input. Supported dtypes: int32, int64. 7050 - **target_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`. 7051 It means the lengths of the target. Supported dtypes: int32, int64. 7052 7053 Outputs: 7054 - **neg_log_likelihood** (Tensor) - A loss value which is differentiable with respect to each input node. 7055 - **log_alpha** (Tensor) - The probability of possible trace of input to target. 7056 7057 Raises: 7058 TypeError: If `zero_infinity` is not a bool. 7059 TypeError: If `reduction` is not string. 7060 TypeError: If the dtype of `log_probs` is not float or double. 7061 TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64. 7062 ValueError: If the rank of `log_probs` is not 3. 7063 ValueError: If the rank of `targets` is not 2. 7064 ValueError: If the shape of `input_lengths` does not match batch_size :math:`N`. 7065 ValueError: If the shape of `target_lengths` does not match batch_size :math:`N`. 7066 TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different. 7067 ValueError: If the value of `blank` is not in range [0, C). 7068 RuntimeError: If any value of `input_lengths` is larger than (num_labels|C). 7069 RuntimeError: If any `target_lengths[i]` is not in range [0, `input_length[i]`]. 7070 7071 Supported Platforms: 7072 ``Ascend`` ``GPU`` ``CPU`` 7073 7074 Examples: 7075 >>> import numpy as np 7076 >>> from mindspore import Tensor, ops 7077 >>> from mindspore import dtype as mstype 7078 >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]], 7079 ... [[0.9, 0.4, 0.2]]]).astype(np.float32)) 7080 >>> targets = Tensor(np.array([[0, 1]]), mstype.int32) 7081 >>> input_lengths = Tensor(np.array([2]), mstype.int32) 7082 >>> target_lengths = Tensor(np.array([1]), mstype.int32) 7083 >>> CTCLossV2 = ops.CTCLossV2(blank=0, reduction='none', zero_infinity=False) 7084 >>> neg_log_hood, log_alpha = CTCLossV2( 7085 ... log_probs, targets, input_lengths, target_lengths) 7086 >>> print(neg_log_hood) 7087 [-2.2986124] 7088 >>> print(log_alpha) 7089 [[[0.3 0.3 -inf -inf -inf] 7090 [1.2 1.8931472 1.2 -inf -inf]]] 7091 """ 7092 7093 @prim_attr_register 7094 def __init__(self, blank=0, reduction="none", zero_infinity=False): 7095 """Initialize CTCLossV2""" 7096 self.init_prim_io_names(inputs=["log_probs", "targets", "input_lengths", "target_lengths"], 7097 outputs=["neg_log_likelihood", "log_alpha"]) 7098 validator.check_value_type("blank", blank, [int], self.name) 7099 self.add_prim_attr("blank", blank) 7100 validator.check_value_type("reduction", reduction, [str], self.name) 7101 self.reduction = reduction.lower() 7102 validator.check_string(self.reduction, ['none'], 'reduction', self.name) 7103 self.add_prim_attr("reduction", self.reduction) 7104 validator.check_value_type("zero_infinity", zero_infinity, [bool], self.name) 7105 self.add_prim_attr("zero_infinity", zero_infinity) 7106 7107 7108class CTCLossV2Grad(Primitive): 7109 """ 7110 Calculates the gradient of CTC (Connectionist Temporal Classification) loss. 7111 7112 The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with 7113 Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_. 7114 7115 Args: 7116 blank (int): The blank label. Default: ``0`` . 7117 reduction (string): Apply specific reduction method to the output. Currently only support 'none'. 7118 Default: ``"none"`` . 7119 zero_infinity (bool): Whether to set infinite loss and correlation gradient to zero. Default: ``False`` . 7120 7121 Inputs: 7122 - **grad_out** (Tenosr) - Gradient renewal codfficient, A tensor for shape (N), where N is batch size. 7123 - **log_probs** (Tensor) - A tensor of shape (T, N, C), where T is input length, N is batch size and C is number 7124 of classes (including blank). 7125 - **targets** (Tensor) - A tensor of shape (N, S), where S is max target length, means the target sequences. 7126 - **input_lengths** (Union(tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the input. 7127 - **target_lengths** (Union(tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the target. 7128 - **log_alpha** (Tensor) - The probability of possible trace of input to target. 7129 - **neg_log_likelihood** (Tensor) - A loss value which is differentiable with respect to each input node. 7130 7131 Outputs: 7132 - **grad** (Tensor) - The grad of Connectionist Temporal Classification Loss. 7133 7134 Raises: 7135 TypeError: If `zero_infinity` is not a bool, reduction is not string. 7136 TypeError: If the dtype of `log_probs` or `grad_out` is not float or double. 7137 TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64. 7138 RuntimeError: If the rank of `log_probs` is not 3. 7139 RuntimeError: If the rank of `targets` is not 2. 7140 RuntimeError: If the shape of `input_lengths` does not match {batch_size|N}. 7141 RuntimeError: If the shape of `target_lengths` does not match {batch_size|N}. 7142 RuntimeError: If the types of `targets`, `input_lengths`, `grad_out` or `target_lengths` are different. 7143 RuntimeError: If the value of `blank` is not in range [0, num_labels|C). 7144 RuntimeError: If any value of `input_lengths` is larger than (num_labels|C). 7145 RuntimeError: If any target_lengths[i] is not in range [0, input_length[i]]. 7146 7147 Supported Platforms: 7148 ``Ascend`` ``CPU`` 7149 """ 7150 7151 @prim_attr_register 7152 def __init__(self, blank, reduction="none", zero_infinity=False): 7153 """Initialize CTCLossV2Grad""" 7154 self.init_prim_io_names(inputs=["grad_out", "log_probs", "targets", "input_lengths", "target_lengths", 7155 "neg_log_likelihood", "log_alpha"], 7156 outputs=["grad"]) 7157 validator.check_value_type("blank", blank, [int], self.name) 7158 self.add_prim_attr("blank", blank) 7159 validator.check_value_type("reduction", reduction, [str], self.name) 7160 self.add_prim_attr("reduction", reduction) 7161 validator.check_value_type("zero_infinity", zero_infinity, [bool], self.name) 7162 self.add_prim_attr("zero_infinity", zero_infinity) 7163 7164 7165class Conv3DTranspose(Primitive): 7166 r""" 7167 Computes a 3D transposed convolution, which is also known as a deconvolution 7168 (although it is not an actual deconvolution). 7169 7170 Input is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size, :math:`C` is channel number, 7171 :math:`D` is depth, :math:`H` is height, :math:`W` is width. 7172 7173 If the 'pad_mode' is set to be "pad", the depth, height and width of output are defined as: 7174 7175 .. math:: 7176 D_{out} = (D_{in} - 1) \times \text{stride}[0] - 2 \times \text{pad}[0] + \text{dilation}[0] 7177 \times (\text{kernel_size}[0] - 1) + \text{output_padding}[0] + 1 7178 7179 H_{out} = (H_{in} - 1) \times \text{stride}[1] - 2 \times \text{pad}[1] + \text{dilation}[1] 7180 \times (\text{kernel_size}[1] - 1) + \text{output_padding}[1] + 1 7181 7182 W_{out} = (W_{in} - 1) \times \text{stride}[2] - 2 \times \text{pad}[2] + \text{dilation}[2] 7183 \times (\text{kernel_size}[2] - 1) + \text{output_padding}[2] + 1 7184 7185 Note: 7186 In Ascend, only support :math:`group=1`. 7187 7188 Args: 7189 in_channel (int): The channel of the input x. 7190 out_channel (int): The channel of the weight x. 7191 kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers. 7192 Specifies the depth, height and width of the 3D convolution window. 7193 Single int means the value is for the depth, height and width of the kernel. 7194 A tuple of 3 ints means the first value is for the depth, the second value is for the height and the 7195 other is for the width of the kernel. 7196 mode (int, optional): Modes for different convolutions. Default is ``1`` . It is currently not used. 7197 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 7198 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` . 7199 7200 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 7201 are the same when `stride` is set to ``1``. 7202 The amount of padding to is calculated by the operator internally. If the amount is even, 7203 it isuniformly distributed around the input, if it is odd, the excess amount goes 7204 to the front/right/bottom side. 7205 If this mode is set, `pad` must be 0. 7206 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 7207 possible depth, height and width. Extra pixels that could not complete a full stride will 7208 be discarded. If this mode is set, `pad` must be 0. 7209 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 7210 in the depth, height and width dimension is determined by the `pad` parameter. 7211 If this mode is set, `pad` must be greater than or equal to 0. 7212 7213 pad (Union(int, tuple[int]), optional): The pad value to be filled. Default: ``0`` . If `pad` is an integer, 7214 the paddings of head, tail, top, bottom, left and right are the same, equal to pad. 7215 If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal 7216 to pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly. 7217 stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents 7218 the depth, height and width of movement are both strides, or a tuple of three int numbers that 7219 represent depth, height and width of movement respectively. Default: ``1`` . 7220 dilation (Union(int, tuple[int]), optional): Specifies the space to use between kernel elements. 7221 Default: ``1`` . 7222 group (int, optional): The number of groups into which the filter is divided. `in_channels` 7223 and `out_channels` must be divisible by `group`. Default: ``1`` . 7224 output_padding (Union(int, tuple[int]), optional): Add extra size to each dimension of the output. 7225 Default: ``0`` . 7226 data_format (str, optional): The optional value for data format. Currently only ``'NCDHW'`` is supported. 7227 Default: ``'NCDHW'``. 7228 7229 Inputs: 7230 - **dout** (Tensor) - The gradients with respect to the output of the convolution. 7231 The shape conforms to the default. 7232 data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`. Currently dout data type only supports float16 7233 and float32. 7234 - **weight** (Tensor) - Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is 7235 :math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter, 7236 :math:`//` is the symbol for integer division. 7237 Currently weight data type only supports float16 and float32. 7238 - **bias** (Tensor) - Tensor of shape :math:`C_{out}`. Currently, only support none. Default: ``None`` . 7239 7240 Outputs: 7241 Tensor, the gradients with respect to the input of convolution 3D. 7242 Tensor of shape :math:`(N, C_{out}//group, D_{out}, H_{out}, W_{out})`, 7243 where :math:`group` is the Args parameter. 7244 7245 Raises: 7246 TypeError: If `in_channel`, `out_channel` or `group` is not an int. 7247 TypeError: If `kernel_size`, `stride`, `pad` , `dilation` or `output_padding` is neither an int not a tuple. 7248 ValueError: If `in_channel`, `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1. 7249 ValueError: If `pad` is less than 0. 7250 ValueError: If `pad_mode` is not one of 'same', 'valid' nor 'pad'. 7251 ValueError: If `pad` is a tuple whose length is not equal to 6. 7252 ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0). 7253 ValueError: If `data_format` is not 'NCDHW'. 7254 TypeError: If data type of dout and weight is neither float16 nor float32. 7255 ValueError: If bias is not none. The rank of dout and weight is not 5. 7256 7257 Supported Platforms: 7258 ``Ascend`` ``GPU`` ``CPU`` 7259 7260 Examples: 7261 >>> import mindspore 7262 >>> import numpy as np 7263 >>> from mindspore import Tensor, ops 7264 >>> dout = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float16) 7265 >>> weight = Tensor(np.ones([16, 3, 4, 6, 2]), mindspore.float16) 7266 >>> conv3d_transpose = ops.Conv3DTranspose(in_channel=16, out_channel=3, kernel_size=(4, 6, 2)) 7267 >>> output = conv3d_transpose(dout, weight) 7268 >>> print(output.shape) 7269 (32, 3, 13, 37, 33) 7270 """ 7271 7272 @prim_attr_register 7273 def __init__(self, 7274 in_channel, 7275 out_channel, 7276 kernel_size, 7277 mode=1, 7278 pad_mode='valid', 7279 pad=0, 7280 stride=1, 7281 dilation=1, 7282 group=1, 7283 output_padding=0, 7284 data_format="NCDHW"): 7285 """Initialize Conv3DTranspose""" 7286 self.init_prim_io_names(inputs=['x', 'filter'], outputs=['output']) 7287 self.in_channel = validator.check_positive_int(in_channel, 'in_channel', self.name) 7288 self.add_prim_attr('in_channel', self.in_channel) 7289 self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) 7290 self.add_prim_attr('out_channel', self.out_channel) 7291 self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name) 7292 if isinstance(kernel_size, int): 7293 self.kernel_size = (kernel_size,) * 3 7294 self.add_prim_attr('kernel_size', self.kernel_size) 7295 self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, 7296 ret_five=True) 7297 self.add_prim_attr('strides', self.stride) 7298 self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False, 7299 ret_five=True, third_one=True) 7300 self.add_prim_attr('dilations', self.dilation) 7301 validator.check_value_type('pad', pad, (int, tuple), self.name) 7302 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 7303 if isinstance(pad, int): 7304 pad = (pad,) * 6 7305 if len(pad) != 6: 7306 raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of " 7307 f"six positive int numbers, but got {self.pad}.") 7308 self.pad_list = pad 7309 validator.check_value_type('pad_mode', pad_mode, [str], self.name) 7310 self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) 7311 self.add_prim_attr('pad_mode', self.pad_mode) 7312 7313 if self.pad_mode != 'pad' and pad != (0, 0, 0, 0, 0, 0): 7314 raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' " 7315 f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.") 7316 7317 if self.pad_mode == 'pad': 7318 for item in self.pad_list: 7319 validator.check_non_negative_int(item, 'pad item', self.name) 7320 self.add_prim_attr('pad_list', self.pad_list) 7321 self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) 7322 self.add_prim_attr('mode', self.mode) 7323 validator.check_value_type("group", group, (int,), self.name) 7324 validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name) 7325 if self.out_channel % group != 0: 7326 raise ValueError("The argument 'group' should be divisible by 'out_channel'") 7327 device_target = context.get_context("device_target") 7328 if device_target == "Ascend" and group != 1: 7329 raise ValueError("On Ascend platform, group = 1 must be satisfied.") 7330 self.group = group 7331 self.add_prim_attr('groups', self.group) 7332 7333 self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name) 7334 self.add_prim_attr('data_format', self.format) 7335 7336 self.output_padding = _check_3d_int_or_tuple('output_padding', output_padding, self.name, 7337 allow_five=False, ret_five=True, greater_zero=False) 7338 output_padding_ = (self.output_padding[2], self.output_padding[3], self.output_padding[4]) 7339 if self.pad_mode != 'pad' and output_padding_ != (0, 0, 0): 7340 raise ValueError(f"For '{self.name}', the 'output_padding' must be zero or (0, 0, 0) " 7341 f"when 'pad_mode' is not \"pad\", but got 'output_padding' is " 7342 f"{output_padding} and 'pad_mode' is {pad_mode}.") 7343 self.add_prim_attr('output_padding', self.output_padding) 7344 validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2], 7345 1, 343, validator.INC_BOTH, 7346 'The product of height, width and depth of kernel_size belonging [1, 343]', 7347 self.name) 7348 validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, validator.INC_BOTH, 7349 'The product of height, width and depth of stride belonging [1, 343]', self.name) 7350 validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, validator.INC_BOTH, 7351 'The product of height, width and depth of stride belonging [1, 256]', self.name) 7352 validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), validator.INC_LEFT, 7353 'output_padding_d belonging [0, max(stride_d, dilation_d))', self.name) 7354 validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), validator.INC_LEFT, 7355 'output_padding_h belonging [0, max(stride_h,dilation_h))', self.name) 7356 validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), validator.INC_LEFT, 7357 'output_padding_w belonging [0, max(stride_w,dilation_w))', self.name) 7358 7359 7360class Dilation2D(Primitive): 7361 r""" 7362 Computes the grayscale dilation of 4-D input and 3-D filters tensors. 7363 7364 Applies a 2D dilation over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, 7365 where :math:`N` is batch size, :math:`H` is height, :math:`W` is width, :math:`C` is channel number. 7366 Given kernel size :math:`ks = (h_{ker}, w_{ker})`, stride :math:`s = (s_0, s_1)` and 7367 dilation :math:`d = (d_0, d_1)`, the operation is as follows: 7368 7369 .. math:: 7370 \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 7371 \text{input}(N_i, C_j, s_0 \times h + d_0 \times m, s_1 \times w + d_1 \times n) + \text{filter}(C_j, m, n) 7372 7373 .. warning:: 7374 This is an experimental API that is subjected to change or deletion. 7375 7376 Note: 7377 If the input data type is float32, this operator is still executed in float16 mode. 7378 7379 Args: 7380 stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents 7381 the height and width of movement are both strides, or a tuple of two int numbers that 7382 represent height and width of movement respectively, or a tuple of four int numbers when 7383 data_format is 'NCHW' represents [1, 1, stride_height, stride_width]. 7384 7385 dilation (Union(int, tuple[int])): The data type is int or a tuple of 2 integers or a tuple of 4 integers. 7386 Specifies the dilation rate to use for dilated convolution. 7387 If set to be :math:`k > 1`, there will be :math:`k - 1` pixels skipped for 7388 each sampling location. Its value must be greater or equal to 1 and bounded by 7389 the height and width of the input `x`. 7390 7391 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 7392 ``"same"`` or ``"valid"`` . Default: ``"valid"`` . 7393 7394 - ``"same"``: Pad the input around its edges so that the shape of input and output 7395 are the same when `stride` is set to ``1``. 7396 The amount of padding to is calculated by the operator internally, If the amount is even, it is 7397 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 7398 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 7399 possible height and width. Extra pixels that could not complete a full stride will 7400 be discarded. 7401 7402 data_format (str, optional): The value for data format, only ``'NCHW'`` is supported at present. 7403 Default: ``"NCHW"`` . 7404 7405 Inputs: 7406 - **x** (Tensor) - Input data. A 4-D Tensor, its shape must be 7407 :math:`(N, C_{in}, H_{in}, W_{in})`. 7408 - **filter** (Tensor) - A three dimension tensor with the same type as input. The shape must be 7409 :math:`(C_{in}, H_{filter}, W_{filter})`. 7410 7411 Outputs: 7412 Tensor, the value that applied 2D dilation. The shape is :math:`(N, C_{out}, H_{out}, W_{out})` which 7413 is not necessarily the same as the input x, the type is the same as the input x. 7414 7415 Raises: 7416 TypeError: If type of `x` or `filter` is not the type in [uint8, uint16, uint32, uint64, int8, int16, 7417 int32, int64, float16, float32, float64]. 7418 TypeError: If `stride` or `dilation` is not an int number or a tuple of two or four int numbers. 7419 ValueError: If the length of `stride` or `dilation` is neither two nor four when they are tuple. 7420 ValueError: If `stride` or `dilation` shape is not (1, 1, height, width) when it is a tuple of four int numbers. 7421 ValueError: If `stride` is not in the range of [1, 255]. 7422 ValueError: If `dilation` is less than 1. 7423 ValueError: If `pad_mode` is not a str of 'same', 'valid', 'SAME' or 'VALID'. 7424 ValueError: If `data_format` is not the str of 'NCHW'. 7425 7426 Supported Platforms: 7427 ``GPU`` ``CPU`` 7428 7429 Examples: 7430 >>> x = Tensor(np.ones([10, 5, 32, 32]), mindspore.float16) 7431 >>> filter = Tensor(np.ones([5, 3, 3]), mindspore.float16) 7432 >>> dilation2d = ops.Dilation2D(stride=1, dilation=1, pad_mode='VALID') 7433 >>> output = dilation2d(x, filter) 7434 >>> print(output.shape) 7435 (10, 5, 30, 30) 7436 """ 7437 7438 @prim_attr_register 7439 def __init__(self, stride, dilation, pad_mode="SAME", data_format="NCHW"): 7440 """Initialize Dilation2D.""" 7441 self.init_prim_io_names(inputs=['x', 'filter'], outputs=['y']) 7442 7443 def _check_format_stride_or_dilation(arg_name, arg_value, prim_name, data_format): 7444 validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name) 7445 if isinstance(arg_value, int): 7446 ret_value = (1, arg_value, arg_value, 1) if data_format == "NHWC" else (1, 1, arg_value, arg_value) 7447 elif len(arg_value) == 2: 7448 ret_value = (1, arg_value[0], arg_value[1], 1) if data_format == "NHWC" else \ 7449 (1, 1, arg_value[0], arg_value[1]) 7450 elif len(arg_value) == 4: 7451 if data_format == "NHWC" and (arg_value[0] != 1 or arg_value[3] != 1): 7452 raise ValueError( 7453 f"For '{prim_name}' attr '{arg_name}' should be [1, {arg_name}_height, {arg_name}_weigth, 1]" 7454 f"when data_format is 'NHWC', but got {arg_value}") 7455 if data_format == "NCHW" and (arg_value[0] != 1 or arg_value[1] != 1): 7456 raise ValueError( 7457 f"For '{prim_name}' attr '{arg_name}' should be [1, 1, {arg_name}_height, {arg_name}_weigth]" 7458 f"when data_format is 'NCHW', but got {arg_value}") 7459 ret_value = arg_value 7460 else: 7461 raise ValueError( 7462 f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two " 7463 f"or four positive int numbers, but got {arg_value}") 7464 for item in ret_value: 7465 if isinstance(item, int) and not isinstance(item, bool) and item > 0: 7466 continue 7467 raise ValueError( 7468 f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two " 7469 f"or four positive int numbers, but got {arg_value}") 7470 return ret_value 7471 7472 if data_format == 'NHWC': 7473 raise ValueError(f"For '{self.name}', NHWC format is not supported at present.") 7474 self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name) 7475 self.add_prim_attr('data_format', self.data_format) 7476 self.pad_mode = validator.check_string(pad_mode, ['VALID', 'SAME', 'valid', 'same'], 'pad_mode', self.name) 7477 self.add_prim_attr('pad_mode', self.pad_mode.upper()) 7478 self.stride = _check_format_stride_or_dilation("stride", stride, self.name, self.data_format) 7479 7480 def is_in_range(x): 7481 return 1 <= x <= 255 7482 7483 if not is_in_range(self.stride[2]) or not is_in_range(self.stride[3]): 7484 raise ValueError(f'For Dilation2D, size of stride is not supported, ' 7485 f'stride should be in the range of [1, 255], ' 7486 f'but got stride_h: `{self.stride[2]}`, stride_w: `{self.stride[3]}`.') 7487 self.add_prim_attr('stride', self.stride) 7488 self.dilation = _check_format_stride_or_dilation("dilation", dilation, self.name, self.data_format) 7489 self.add_prim_attr('dilation', self.dilation) 7490 7491 7492class SoftShrink(Primitive): 7493 r""" 7494 Applies the SoftShrink function element-wise. 7495 7496 Refer to :func:`mindspore.ops.softshrink` for more details. 7497 7498 Args: 7499 lambd(float, optional): The :math:`\lambda` must be no less than zero. Default: ``0.5`` . 7500 7501 Inputs: 7502 - **input_x** (Tensor) - The input of soft shrink with data type of float16 or float32. 7503 7504 Outputs: 7505 Tensor, has the same shape and data type as `input_x`. 7506 7507 Supported Platforms: 7508 ``Ascend`` ``GPU`` ``CPU`` 7509 7510 Examples: 7511 >>> import mindspore 7512 >>> import numpy as np 7513 >>> from mindspore import Tensor, ops 7514 >>> input_x = Tensor(np.array([[ 0.5297, 0.7871, 1.1754], [ 0.7836, 0.6218, -1.1542]]), mindspore.float16) 7515 >>> softshrink = ops.SoftShrink() 7516 >>> output = softshrink(input_x) 7517 >>> print(output) 7518 [[ 0.02979 0.287 0.676 ] 7519 [ 0.2837 0.1216 -0.6543 ]] 7520 """ 7521 7522 @prim_attr_register 7523 def __init__(self, lambd=0.5): 7524 """Initialize SoftShrink""" 7525 validator.check_value_type("lambd", lambd, [float], self.name) 7526 validator.check_number("lambd", lambd, 0, validator.GE, self.name) 7527 7528 7529class ApplyAdagradDA(Primitive): 7530 r""" 7531 Update `var` according to the proximal adagrad scheme. 7532 The Adagrad algorithm was proposed in 7533 `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization 7534 <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_. 7535 7536 .. math:: 7537 \begin{array}{ll} \\ 7538 grad\_accum += grad \\ 7539 grad\_squared\_accum += grad * grad \\ 7540 tmp\_val= 7541 \begin{cases} 7542 sign(grad\_accum) * max\left \{|grad\_accum|-l1*global\_step, 0\right \} & \text{ if } l1>0 \\ 7543 grad\_accum & \text{ otherwise } \\ 7544 \end{cases} \\ 7545 x\_value = -1 * lr * tmp\_val \\ 7546 y\_value = l2 * global\_step * lr + \sqrt{grad\_squared\_accum} \\ 7547 var = \frac{ x\_value }{ y\_value } 7548 \end{array} 7549 7550 Inputs of `var`, `gradient_accumulator`, `gradient_squared_accumulator` and `grad` 7551 comply with the implicit type conversion rules to make the data types consistent. 7552 If they have different data types, the lower priority data type will be converted to 7553 the relatively highest priority data type. 7554 7555 Args: 7556 use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock. 7557 Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` . 7558 7559 Inputs: 7560 - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32. 7561 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 7562 - **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`. Must have the same 7563 shape as `var`. 7564 - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`. 7565 Must have the same shape as `var`. 7566 - **grad** (Tensor) - A tensor for gradient. Must have the same shape as `var`. 7567 - **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type. 7568 - **l1** ([Number, Tensor]) - L1 regularization. Must be a scalar. With float32 or float16 data type. 7569 - **l2** ([Number, Tensor]) - L2 regularization. Must be a scalar. With float32 or float16 data type. 7570 - **global_step** ([Number, Tensor]) - Training step number. Must be a scalar. With int32 or int64 data type. 7571 7572 Outputs: 7573 Tuple of 1 Tensors, the updated parameters. 7574 7575 - **var** (Tensor) - The same shape and data type as `var`. 7576 7577 Raises: 7578 TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` is not a Parameter. 7579 TypeError: If `grad` is not a Tensor. 7580 TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor. 7581 TypeError: If use_locking is not a bool. 7582 TypeError: If dtype of `var`, `gradient_accumulator`, `gradient_squared_accumulator`, `grad`, 7583 `lr`, `l1` or `l2` is neither float16 nor float32. 7584 TypeError: If dtype of `gradient_accumulator`, `gradient_squared_accumulator` or `grad` is not same as `var`. 7585 TypeError: If dtype of `global_step` is not int32 nor int64. 7586 ValueError: If the shape size of `lr`, `l1`, `l2` and `global_step` is not 0. 7587 TypeError: If the data type of `var`, `gradient_accumulator`, `gradient_squared_accumulator` and `grad` 7588 conversion of Parameter is not supported. 7589 7590 Supported Platforms: 7591 ``Ascend`` ``GPU`` ``CPU`` 7592 7593 Examples: 7594 >>> import numpy as np 7595 >>> from mindspore import dtype as mstype 7596 >>> from mindspore import Tensor, nn, ops, Parameter 7597 >>> class ApplyAdagradDANet(nn.Cell): 7598 ... def __init__(self, use_locking=False): 7599 ... super(ApplyAdagradDANet, self).__init__() 7600 ... self.apply_adagrad_d_a = ops.ApplyAdagradDA(use_locking) 7601 ... self.var = Parameter(Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32)), name="var") 7602 ... self.gradient_accumulator = Parameter(Tensor(np.array([[0.1, 0.3], 7603 ... [0.1, 0.5]]).astype(np.float32)), 7604 ... name="gradient_accumulator") 7605 ... self.gradient_squared_accumulator = Parameter(Tensor(np.array([[0.2, 0.1], 7606 ... [0.1, 0.2]]).astype(np.float32)), 7607 ... name="gradient_squared_accumulator") 7608 ... self.gradient_accumulator = Parameter(Tensor(np.array([[0.1, 0.3], 7609 ... [0.1, 0.5]]).astype(np.float32)), 7610 ... name="gradient_accumulator") 7611 ... def construct(self, grad, lr, l1, l2, global_step): 7612 ... out = self.apply_adagrad_d_a(self.var, self.gradient_accumulator, 7613 ... self.gradient_squared_accumulator, grad, lr, l1, l2, global_step) 7614 ... return out 7615 ... 7616 >>> net = ApplyAdagradDANet() 7617 >>> grad = Tensor(np.array([[0.3, 0.4], [0.1, 0.2]]).astype(np.float32)) 7618 >>> lr = Tensor(0.001, mstype.float32) 7619 >>> l1 = Tensor(0.001, mstype.float32) 7620 >>> l2 = Tensor(0.001, mstype.float32) 7621 >>> global_step = Tensor(2, mstype.int32) 7622 >>> output = net(grad, lr, l1, l2, global_step) 7623 >>> print(output) 7624 (Tensor(shape=[2, 2], dtype=Float32, value= 7625 [[-7.39064650e-04, -1.36888528e-03], 7626 [-5.96988888e-04, -1.42478070e-03]])) 7627 """ 7628 7629 __mindspore_signature__ = ( 7630 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7631 sig.make_sig('gradient_accumulator', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7632 sig.make_sig('gradient_squared_accumulator', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7633 sig.make_sig('grad', dtype=sig.sig_dtype.T), 7634 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 7635 sig.make_sig('l1', dtype=sig.sig_dtype.T2), 7636 sig.make_sig('l2', dtype=sig.sig_dtype.T3), 7637 sig.make_sig('global_step', dtype=sig.sig_dtype.T4) 7638 ) 7639 7640 @prim_attr_register 7641 def __init__(self, use_locking=False): 7642 """Initialize ApplyAdagradDA""" 7643 validator.check_value_type("use_locking", use_locking, [bool], self.name) 7644 self.add_prim_attr('side_effect_mem', True) 7645 7646 7647class SparseApplyRMSProp(Primitive): 7648 r""" 7649 Update relevant entries according to the rmsprop algorithm. 7650 7651 .. math:: 7652 \begin{array}{ll} \\ 7653 ms = rho * ms_{t-1} + (1 - rho) * grad * grad \\ 7654 mom = momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) \\ 7655 var = var - mom 7656 \end{array} 7657 7658 Inputs of `var`, `ms`, `mom` and `grad` comply with the implicit type conversion rules 7659 to make the data types consistent. 7660 If they have different data types, the lower priority data type will be converted to 7661 the relatively highest priority data type. 7662 7663 Args: 7664 rho (float): Decay rate. The value should be between 0 and 1, otherwise the behavior is undefined. 7665 momentum (float): Momentum. The value should be greater or equal to 0, otherwise the behavior is undefined. 7666 epsilon (float): A small value added for numerical stability. The value should be greater than 0, 7667 otherwise the behavior is undefined. 7668 use_locking (bool): If ``True`` , updating of the var, ms, and mom tensors are protected by a lock; 7669 otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` . 7670 7671 Inputs: 7672 - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32. 7673 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 7674 - **ms** (Parameter) - The dict of mutable tensor ms. Must have the same shape and dtype as `var`. 7675 - **mom** (Parameter) - The dict of mutable tensor mom. Must have the same shape and dtype as `var`. 7676 - **lr** ([Number, Tensor]) - Learning rate. Must be a scalar. With float16 or float32 data type. 7677 - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`. 7678 - **indices** (Tensor) - A tensor of indices in the first dimension of `var`, `ms` and `mom`. 7679 If there are duplicates in `indices`, the behavior is undefined. Must be one of the 7680 following types: int32, int64 and indices.shape[0] = var.shape[0]. 7681 7682 Outputs: 7683 Tuple of 3 Tensors, the updated parameters. 7684 7685 - **var** (Tensor) - The same shape and data type as `var`. 7686 - **ms** (Tensor) - The same shape and data type as `ms`. 7687 - **mom** (Tensor) - The same shape and data type as `mom`. 7688 7689 Raises: 7690 TypeError: If `var`, `ms` or `mom` is not a Parameter. 7691 TypeError: If `grad` or `indices` is not a Tensor. 7692 TypeError: If dtype of `var`, `ms`, `mom`, `lr`, `grad` is neither float16 nor float32. 7693 TypeError: If dtype of `indices` is neither int32 nor int64. 7694 TypeError: If `lr` is neither a Number or a Tensor. 7695 TypeError: If `use_locking` is not a bool. 7696 TypeError: If dtype of `epsilon`, `rho`, `momentum` is not a float. 7697 ValueError: If shape of `ms`, `mom`, `grad` is not same as `var`. 7698 ValueError: If the shape size of `lr` is not 0. 7699 ValueError: If shape of `indices` is not same as shape of first dimension of `var`. 7700 ValueError: If `epsilon` is less than or equal to 0. 7701 ValueError: If `momentum` is less than 0. 7702 ValueError: If `rho` is less than 0 or greater than 1. 7703 ValueError: If dimension of `var` is less than 1. 7704 RuntimeError: If the data type of `var`, `ms`, `mom` and `grad` conversion of Parameter is not supported. 7705 7706 Supported Platforms: 7707 ``Ascend`` ``GPU`` ``CPU`` 7708 7709 Examples: 7710 >>> class SparseApplyRMSPropNet(nn.Cell): 7711 ... def __init__(self, rho, momentum, epsilon, use_locking=False): 7712 ... super(SparseApplyRMSPropNet, self).__init__() 7713 ... self.sparse_apply_r_m_s_prop = P.SparseApplyRMSProp(rho, momentum, epsilon, use_locking) 7714 ... self.var = Parameter(Tensor(np.array([[0.6, 0.3], [0.1, 0.5]]).astype(np.float32)), name="var") 7715 ... self.ms = Parameter(Tensor(np.array([[0.2, 0.4], [0.1, 0.3]]).astype(np.float32)), name="ms") 7716 ... self.mom = Parameter(Tensor(np.array([[0.3, 0.1], [0.3, 0.6]]).astype(np.float32)), name="mom") 7717 ... def construct(self, lr, grad, indices): 7718 ... out = self.sparse_apply_r_m_s_prop(self.var, self.ms, self.mom, lr, grad, indices) 7719 ... return out 7720 ... 7721 >>> rho = 0.2 7722 >>> momentum = 0.01 7723 >>> epsilon = 1e-6 7724 >>> net = SparseApplyRMSPropNet(rho, momentum, epsilon) 7725 >>> lr = 0.01 7726 >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32)) 7727 >>> indices = Tensor(np.array([0, 1], dtype=np.int32)) 7728 >>> out = net(lr, grad, indices) 7729 >>> print(out) 7730 (Tensor(shape=[2, 2], dtype=Float32, value= 7731 [[ 5.88035822e-01, 2.88811117e-01], 7732 [ 9.10239667e-02, 4.83422279e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 7733 [[ 1.12000003e-01, 4.72000003e-01], 7734 [ 2.80000009e-02, 5.72000027e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 7735 [[ 1.19641740e-02, 1.11888833e-02], 7736 [ 8.97603668e-03, 1.65777095e-02]])) 7737 """ 7738 7739 __mindspore_signature__ = ( 7740 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7741 sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7742 sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7743 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 7744 sig.make_sig('grad', dtype=sig.sig_dtype.T), 7745 sig.make_sig('indices', dtype=sig.sig_dtype.T2) 7746 ) 7747 7748 @prim_attr_register 7749 def __init__(self, rho, momentum, epsilon, use_locking=False): 7750 """"Initialize SparseApplyRMSProp""" 7751 validator.check_value_type("rho", rho, [float], self.name) 7752 validator.check_value_type("momentum", momentum, [float], self.name) 7753 validator.check_value_type("epsilon", epsilon, [float], self.name) 7754 validator.check_value_type("use_locking", use_locking, [bool], self.name) 7755 self.epsilon = validator.check_number("epsilon", epsilon, 0.0, validator.GT, self.name) 7756 self.momentum = validator.check_number("momentum", momentum, 0.0, validator.GE, self.name) 7757 self.rho = validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", self.name) 7758 7759 7760class SparseApplyCenteredRMSProp(Primitive): 7761 r""" 7762 Update `var` according to the centered RMSProp algorithm. 7763 7764 .. math:: 7765 \begin{array}{l} 7766 \text { mean_square }=\text { decay } * \text { mean_square }+(1-\text { decay }) * 7767 \text { gradient }^{2} \\ 7768 \text { mean_grad }=\text { decay } * \text { mean_grad }+(1-\text { decay }) * 7769 \text { gradient } \\ 7770 \text { Delta }=l r * \frac{\text { gradient }}{\sqrt{\text { mean_square }+ 7771 \text { epsilon-mean_grad }^{2}}} \\ 7772 \text { ms }<-\text { rho } * \text { ms }_{t-1}+(1-\text { rho }) * \text { grad } * \text { grad } \\ 7773 \text { mom }<-\text { momentum } * \text { mom }_{t-1}+\operatorname{lr} * 7774 \frac{\text { grad }}{\sqrt{\text { ms+epsilon }}} \\ 7775 \text { var }<-\text { var }-\text { mom } 7776 \end{array} 7777 7778 .. warning:: 7779 In dense implementation of this algorithm, `mean_gradient`, `mean_square`, and `moment` will update 7780 even if the `grad` is zero. But in this sparse implementation, `mean_gradient`, `mean_square`, and `moment` 7781 will not update in iterations during which the `grad` is zero. 7782 7783 Args: 7784 use_locking (bool): If ``True`` , updating of the `var`, `mg`, `ms`, and `mom` tensors will be protected by a 7785 lock. Otherwise the behavior is undefined, but may exhibit less contention. 7786 Default: ``False`` . 7787 7788 Inputs: 7789 - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64, 7790 uint8, uint16, uint32, uint64, float16, float32 or float64. 7791 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 7792 - **mg** (Parameter) - Mean gradients. Must have the same shape and dtype as `var`. 7793 - **ms** (Parameter) - Mean square gradients. Must have the same shape and dtype as `var`. 7794 - **mom** (Parameter) - Delta of `var`. Must have the same shape and dtype as `var`. 7795 - **lr** (Union[Number, Tensor]) - Learning rate. Must be a float number or a scalar tensor. 7796 Must have the same type as `var`. 7797 - **rho** (Union[Number, Tensor]) - Decay rate. Must be a float number or a scalar tensor. 7798 Must have the same type as `var`. 7799 - **momentum** (Union[Number, Tensor]) - Momentum. Must be a float number or a scalar tensor. 7800 Must have the same type as `var`. 7801 - **epsilon** (Union[Number, Tensor]) - Ridge term. Must be a float number or a scalar tensor. 7802 Must have the same type as `var`. 7803 - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1. 7804 - **indices** (Tensor) - Gradient indices. Must be one of the following types: int32, int64. 7805 and indices.shape[0] = grad.shape[0]. 7806 7807 Outputs: 7808 - **var** (Tensor) - Tensor, has the same shape and data type as `var`. 7809 7810 Raises: 7811 TypeError: If `use_locking` is not a bool. 7812 TypeError: If `var`, `mg`, `ms`, `mom`, `grad`, `indices` is not a Tensor. 7813 TypeError: If `lr`, `rho`, `momentum` or `epsilon` is neither a Number nor a Tensor. 7814 TypeError: If dtype of `var`, `mg`, `ms`, `mom`, `lr`, `rho`, `momentum`, `epsilon` or `grad` 7815 is neither float16 nor float32. 7816 TypeError: If dtype of `mg`, `ms`, `mom`, `grad` is not same as `var`. 7817 TypeError: If dtype of `indices` is not int32 or int64. 7818 ValueError: If shape of `mg`, `ms` or `mom` is not same as `var`. 7819 ValueError: If the rank of `indices` is not equal to 1. 7820 ValueError: If dimension of `grad` is not equal or greater than 1. 7821 ValueError: If shape of `indices` is not same as shape of first dimension of `grad`. 7822 ValueError: If shape of `grad` is not same as shape of `var` except first dimension. 7823 7824 Supported Platforms: 7825 ``Ascend`` ``GPU`` ``CPU`` 7826 7827 Examples: 7828 >>> var = Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32)) 7829 >>> mg = Tensor(np.array([[0.1, 0.3], [0.1, 0.5]]).astype(np.float32)) 7830 >>> ms = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32)) 7831 >>> mom = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32)) 7832 >>> lr = Tensor(0.001, mstype.float32) 7833 >>> rho = Tensor(1e-10, mstype.float32) 7834 >>> momentum = Tensor(0.001, mstype.float32) 7835 >>> epsilon = Tensor(0.01, mstype.float32) 7836 >>> grad = Tensor(np.array([[0.3, 0.4], [0.1, 0.2]]).astype(np.float32)) 7837 >>> indices = Tensor(np.array([0, 1]).astype(np.int32)) 7838 >>> sparse_apply_centered_rms_prop = nn_ops.SparseApplyCenteredRMSProp() 7839 >>> output = sparse_apply_centered_rms_prop(var, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices) 7840 >>> print(output) 7841 [[0.5968 0.3959] 7842 [0.0989 0.4978]] 7843 """ 7844 7845 __mindspore_signature__ = ( 7846 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7847 sig.make_sig('mg', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7848 sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7849 sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7850 sig.make_sig('lr', dtype=sig.sig_dtype.T), 7851 sig.make_sig('rho', dtype=sig.sig_dtype.T), 7852 sig.make_sig('momentum', dtype=sig.sig_dtype.T), 7853 sig.make_sig('epsilon', dtype=sig.sig_dtype.T), 7854 sig.make_sig('grad', dtype=sig.sig_dtype.T), 7855 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 7856 ) 7857 7858 @prim_attr_register 7859 def __init__(self, use_locking=False): 7860 """Initialize SparseApplyCenteredRMSProp.""" 7861 self.init_prim_io_names(inputs=['var', 'mg', 'ms', 'mom', 'lr', 'rho', 'momentum', 7862 'epsilon', 'grad', 'indices'], 7863 outputs=['var']) 7864 validator.check_value_type("use_locking", use_locking, [bool], self.name) 7865 7866 7867class ApplyKerasMomentum(Primitive): 7868 r""" 7869 Update `var` according to the momentum scheme. 7870 7871 .. math:: 7872 \begin{array}{ll} \\ 7873 accum = accum * momentum - grad * lr \\ 7874 var = 7875 \begin{cases} 7876 var + accum * momentum - grad * lr, &\text{if use_nesterov} \\ 7877 var + accum, &\text{else} 7878 \end{cases} 7879 \end{array} 7880 7881 Refer to the paper `On the importance of initialization and momentum in deep 7882 learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_ for more details. 7883 7884 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 7885 to make the data types consistent. 7886 If they have different data types, the lower priority data type will be converted to 7887 relatively highest priority data type. 7888 RuntimeError exception will be thrown when the data type conversion of Parameter is required. 7889 7890 Args: 7891 use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock; 7892 Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` . 7893 use_nesterov (bool): If ``True`` , the tensor passed to compute grad will be var + momentum * accum, 7894 so in the end, the var you get is actually var + momentum * accum. Default: ``False`` . 7895 7896 Inputs: 7897 - **var** (Parameter) - Variable to be updated. With float16 or float32 data type. 7898 - **accum** (Parameter) - Must have the same shape and type as `var`. With float16 or float32 data type. 7899 - **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. With float16 or float32 data type. 7900 - **grad** (Tensor) - The gradient. Must have the same shape and type as `var`. 7901 With float16 or float32 data type. 7902 - **momentum** (Union[Number, Tensor]) - Momentum. Must be a scalar. With float16 or float32 data type. 7903 7904 Outputs: 7905 Tuple of 2 Tensors, the updated parameters. 7906 7907 - **var** (Tensor) - The same shape and data type as `var`. 7908 - **accum** (Tensor) - The same shape and data type as `accum`. 7909 7910 Raises: 7911 TypeError: If the use_locking or use_nesterov is not a bool. 7912 TypeError: If `var` or `accum` is not a Parameter. 7913 TypeError: If `lr` is neither a Number nor a Tensor. 7914 TypeError: If `grad` is not a Tensor. 7915 TypeError: If `momentum` is neither a Number nor a Tensor. 7916 TypeError: If dtype of `var`, `accum`, `lr`, `grad`, `momentum` is neither float16 nor float32. 7917 ValueError: If `accum` or `grad` doesn't have the same shape as `var`. 7918 ValueError: If the shape size of `lr`, `momentum` is not 0. 7919 7920 Supported Platforms: 7921 ``Ascend`` 7922 7923 Examples: 7924 >>> class ApplyKerasMomentumNet(nn.Cell): 7925 ... def __init__(self, use_locking=False, use_nesterov=False): 7926 ... super(ApplyKerasMomentumNet, self).__init__() 7927 ... self.apply_keras_momentum = P.ApplyKerasMomentum(use_locking, use_nesterov) 7928 ... self.var = Parameter(Tensor(np.array([[0.2, 0.3], [0.1, 0.4]]).astype(np.float32)), name="var") 7929 ... self.accum = Parameter(Tensor(np.array([[0.2, 0.3], [0.1, 0.4]]).astype(np.float32)), name="accum") 7930 ... def construct(self, lr, grad, momentum): 7931 ... out = self.apply_keras_momentum(self.var, self.accum, lr, grad, momentum) 7932 ... return out 7933 ... 7934 >>> net = ApplyKerasMomentumNet() 7935 >>> lr = Tensor(0.001, mstype.float32) 7936 >>> grad = Tensor(np.array([[0.3, 0.2], [0.4, 0.1]]).astype(np.float32)) 7937 >>> momentum = Tensor(0.99, mstype.float32) 7938 >>> output = net(lr, grad, momentum) 7939 >>> print(output) 7940 (Tensor(shape=[2, 2], dtype=Float32, value= 7941 [[ 3.97700012e-01, 5.96800029e-01], 7942 [ 1.98599994e-01, 7.95899987e-01]]), Tensor(shape=[2, 2], dtype=Float32, value= 7943 [[ 1.97699994e-01, 2.96800017e-01], 7944 [ 9.86000001e-02, 3.95900011e-01]])) 7945 """ 7946 7947 __mindspore_signature__ = ( 7948 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7949 sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 7950 sig.make_sig('lr', dtype=sig.sig_dtype.T1), 7951 sig.make_sig('grad', dtype=sig.sig_dtype.T), 7952 sig.make_sig('momentum', dtype=sig.sig_dtype.T2) 7953 ) 7954 7955 @prim_attr_register 7956 def __init__(self, use_locking=False, use_nesterov=False): 7957 """Initialize ApplyKerasMomentum""" 7958 validator.check_value_type("use_locking", use_locking, [bool], self.name) 7959 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 7960 7961 7962class MultilabelMarginLoss(Primitive): 7963 r""" 7964 Creates a loss criterion that minimizes the hinge loss for multi-class 7965 classification tasks. 7966 It takes a 2D mini-batch Tensor :math:`x` as input and a 2D 7967 Tensor :math:`y` containing target class indices as output. 7968 7969 Refer to :func:`mindspore.ops.multilabel_margin_loss` for more details. 7970 7971 Args: 7972 reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , 7973 ``'sum'`` . Default: ``'mean'`` . 7974 7975 - ``'none'``: no reduction will be applied. 7976 - ``'mean'``: compute and return the mean of elements in the output. 7977 - ``'sum'``: the output elements will be summed. 7978 7979 Inputs: 7980 - **x** (Tensor) - Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N` 7981 is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32. 7982 - **target** (Tensor) - Ground truth data, with the same shape as `input`, data type must be int32 and 7983 label targets padded by -1. 7984 7985 Outputs: 7986 - **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is ``"none"``, its shape 7987 is :math:`(N)`. Otherwise, a scalar value will be returned. 7988 - **is_target** (Tensor) - Output tensor for backward input, with the same shape as `target`, 7989 data type must be int32. 7990 7991 Supported Platforms: 7992 ``Ascend`` ``GPU`` 7993 7994 Examples: 7995 >>> import mindspore 7996 >>> import numpy as np 7997 >>> from mindspore import Tensor, ops 7998 >>> loss = ops.MultilabelMarginLoss() 7999 >>> x = Tensor(np.array([[0.1, 0.2, 0.4, 0.8], [0.2, 0.3, 0.5, 0.7]]), mindspore.float32) 8000 >>> target = Tensor(np.array([[1, 2, 0, 3], [2, 3, -1, 1]]), mindspore.int32) 8001 >>> output = loss(x, target) 8002 >>> print(output) 8003 (Tensor(shape=[], dtype=Float32, value= 0.325), Tensor(shape=[2, 4], dtype=Int32, value= 8004 [[1, 1, 1, 1], [0, 0, 1, 1]])) 8005 """ 8006 8007 @prim_attr_register 8008 def __init__(self, reduction='mean'): 8009 """Initialize MultilabelMarginLoss""" 8010 self.init_prim_io_names(inputs=['x', 'target'], outputs=['y', 'is_target']) 8011 self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) 8012 8013 8014class ApplyAdamWithAmsgrad(Primitive): 8015 r""" 8016 Update var according to the Adam algorithm. 8017 8018 .. math:: 8019 \begin{array}{l1} \\ 8020 lr_t:=learning\_rate*\sqrt{1-\beta_2^t}/(1-\beta_1^t) \\ 8021 m_t:=\beta_1*m_{t-1}+(1-\beta_1)*g \\ 8022 v_t:=\beta_2*v_{t-1}+(1-\beta_2)*g*g \\ 8023 \hat v_t:=max(\hat v_{t-1}, v_t) \\ 8024 var:=var-lr_t*m_t/(\sqrt{\hat v_t}+\epsilon) \\ 8025 \end{array} 8026 8027 Inputs of `var`, `m`, `v`, `vhat` and `grad` comply with the implicit type conversion rules 8028 to make the data types consistent. 8029 If they have different data types, the lower priority data type will be converted to 8030 the relatively highest priority data type. 8031 8032 Inputs of `beta1_power`, `beta1`, `beta2` and `epsilon` comply with the implicit type conversion rules 8033 to make the data types consistent. 8034 If they have different data types, the lower priority data type will be converted to 8035 the relatively highest priority data type. 8036 8037 However, note that there is no implicit type conversion rule between `var` and `beta1_power`; 8038 the two sets of rules are independent of each other. 8039 8040 Args: 8041 beta1 (float): A Tensor. Must have the same type as beta1_power. Momentum factor. Must be a scalar. 8042 beta2 (float): A Tensor. Must have the same type as beta1_power. Momentum factor. Must be a scalar. 8043 epsilon (float): A Tensor. Must have the same type as beta1_power. Ridge term. Must be a scalar. 8044 use_locking (bool): use_locking: If ``True`` , updating of the `var`, `m`, and `v` tensors will 8045 be protected by a lock; Otherwise the behavior is undefined, but may exhibit less contention. 8046 Default: ``False`` . 8047 8048 Inputs: 8049 - **var** (Parameter) - Variable to be updated. The data type can be float16 or float32. 8050 - **m** (Parameter) - The 1st moment vector in the updating formula, 8051 the shape and data type value should be the same as `var`. 8052 - **v** (Parameter) - the 2nd moment vector in the updating formula, 8053 the shape and data type value should be the same as `var`. 8054 - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula, 8055 the shape and data type value should be the same as `var`. 8056 - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula, 8057 a scalar tensor with float16 or float32 data type. 8058 - **beta2_power** (Union[float, Tensor]) - :math:`beta_2^t(\beta_2^{t})` in the updating formula, 8059 a scalar tensor with float16 or float32 data type. 8060 - **lr** (Union[float, Tensor]) - Scaling factor, a scalar tensor with float16 or float32 data type. 8061 - **grad** (Tensor) - The gradient, has the same shape and data type as `var`. 8062 8063 Outputs: 8064 Tuple of 4 Tensors, the updated parameters. 8065 8066 - **var** (Tensor) - The same shape and data type as `var`. 8067 - **m** (Tensor) - The same shape and data type as `m`. 8068 - **v** (Tensor) - The same shape and data type as `v`. 8069 - **vhat** (Tensor) - The same shape and data type as `vhat`. 8070 8071 Raises: 8072 TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter. 8073 TypeError: If `beta1_power`, `beta2_power`, `lr` is neither a Number nor a Tensor. 8074 TypeError: If `grad` is not a Tensor. 8075 TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`, 8076 `lr`, `grad`, `momentum` is not float32 or float16. 8077 ValueError: If `m` or `v` or `vhat` or `grad` doesn't have the same shape of `var`. 8078 ValueError: If the shape of `beta1_power`, `beta2_power`, `lr` is not 0. 8079 8080 Supported Platforms: 8081 ``Ascend`` ``GPU`` ``CPU`` 8082 8083 Examples: 8084 >>> class ApplyAdamWithAmsgradNet(nn.Cell): 8085 ... def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False): 8086 ... super(ApplyAdamWithAmsgradNet, self).__init__() 8087 ... self.apply_adam_with_amsgrad = P.ApplyAdamWithAmsgrad(beta1, beta2, epsilon, use_locking) 8088 ... self.var = Parameter(Tensor(np.array([[0.2, 0.2], [0.2, 0.2]]).astype(np.float32)), name="var") 8089 ... self.m = Parameter(Tensor(np.array([[0.1, 0.2], [0.4, 0.3]]).astype(np.float32)), name="m") 8090 ... self.v = Parameter(Tensor(np.array([[0.2, 0.1], [0.3, 0.4]]).astype(np.float32)), name="v") 8091 ... self.vhat = Parameter(Tensor(np.array([[0.1, 0.2], [0.6, 0.2]]).astype(np.float32)), name="vhat") 8092 ... def construct(self, beta1_power, beta2_power, lr, grad): 8093 ... out = self.apply_adam_with_amsgrad(self.var, self.m, self.v, self.vhat, 8094 ... beta1_power, beta2_power, lr, grad) 8095 ... return out 8096 >>> net = ApplyAdamWithAmsgradNet() 8097 >>> grad = Tensor(np.array([[0.4, 0.2], [0.2, 0.3]]).astype(np.float32)) 8098 >>> output = net(Tensor(0.9, mstype.float32), Tensor(0.999, mstype.float32), Tensor(0.01, mstype.float32), grad) 8099 >>> print(net.var.asnumpy()) 8100 [[0.19908068 0.1985858 ] 8101 [0.19844866 0.19849943]] 8102 """ 8103 8104 __mindspore_signature__ = ( 8105 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8106 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8107 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8108 sig.make_sig('vhat', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8109 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T1), 8110 sig.make_sig('beta2_power', dtype=sig.sig_dtype.T2), 8111 sig.make_sig('lr', dtype=sig.sig_dtype.T3), 8112 sig.make_sig('grad', dtype=sig.sig_dtype.T) 8113 ) 8114 8115 @prim_attr_register 8116 def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False): 8117 """Initialize ApplyAdamWithAmsgrad""" 8118 validator.check_value_type("beta1", beta1, [float], self.name) 8119 validator.check_value_type("beta2", beta2, [float], self.name) 8120 validator.check_value_type("epsilon", epsilon, [float], self.name) 8121 validator.check_value_type("use_locking", use_locking, [bool], self.name) 8122 self.add_prim_attr("side_effect_mem", True) 8123 8124 8125class ApplyAdamWithAmsgradV2(Primitive): 8126 r""" 8127 Update var according to the Adam algorithm. 8128 8129 .. math:: 8130 \begin{array}{l1} \\ 8131 lr_t:=learning\_rate*\sqrt{1-\beta_2^t}/(1-\beta_1^t) \\ 8132 m_t:=\beta_1*m_{t-1}+(1-\beta_1)*g \\ 8133 v_t:=\beta_2*v_{t-1}+(1-\beta_2)*g*g \\ 8134 \hat v_t:=\max(\hat v_{t-1}, v_t) \\ 8135 var:=var-lr_t*m_t/(\sqrt{\hat v_t}+\epsilon) \\ 8136 \end{array} 8137 8138 :math:`t` represents updating step while :math:`m` represents the 1st moment vector, 8139 :math:`v` represents the 2nd moment vector, :math:`\hat v_t` represents `vhat`, 8140 :math:`lr` represents learning rate, 8141 :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, 8142 :math:`\beta_1^{t}` represents `beta1_power`, :math:`\beta_2^{t}` represents `beta2_power`, 8143 :math:`var` represents the variable to be updated, 8144 :math:`\epsilon` represents `epsilon`. 8145 8146 All of the inputs are consistent with implicit type conversion rules, 8147 which ensure that the data types are the same. If they have different data types, the lower precision data type 8148 will be converted to the data type with relatively higher precision. 8149 8150 Args: 8151 use_locking (bool): If ``True`` , updating of the `var`, `m`, and `v` tensors will 8152 be protected by a lock; Otherwise the behavior is undefined, but may exhibit less contention. 8153 Default: ``False`` . 8154 8155 Inputs: 8156 - **var** (Parameter) - Variable to be updated. The data type can be float16, float32 or float64. 8157 - **m** (Parameter) - The 1st moment vector in the updating formula, 8158 the shape should be the same as `var`. 8159 - **v** (Parameter) - The 2nd moment vector in the updating formula, 8160 the shape should be the same as `var`. 8161 - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula, 8162 the shape and data type value should be the same as `var`. 8163 - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula, 8164 with float16, float32 or float64 data type. 8165 - **beta2_power** (Union[float, Tensor]) - :math:`beta_2^t(\beta_2^{t})` in the updating formula, 8166 with float16, float32 or float64 data type. 8167 - **lr** (Union[float, Tensor]) - Learning rate, with float16, float32 or float64 data type. 8168 - **beta1** (Union[float, Tensor]) - Exponential decay rate of the first moment. 8169 The data type can be float16, float32 or float64. 8170 - **beta2** (Union[float, Tensor]) - Exponential decay rate of the second moment. 8171 The data type can be float16, float32 or float64. 8172 - **epsilon** (Union[float, Tensor]) - A value added to the denominator to ensure numerical stability. 8173 The data type can be float16, float32 or float64. 8174 - **grad** (Tensor) - The gradient, has the same shape as `var`. 8175 8176 Outputs: 8177 Tuple of 4 Tensors, the updated parameters. 8178 8179 - **var** (Tensor) - The same shape and data type as `var`. 8180 - **m** (Tensor) - The same shape and data type as `m`. 8181 - **v** (Tensor) - The same shape and data type as `v`. 8182 - **vhat** (Tensor) - The same shape and data type as `vhat`. 8183 8184 Raises: 8185 TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter. 8186 TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`, 8187 `lr`, `beta1` , `beta2` , `epsilon` or `grad` is not float64, float32 or float16. 8188 RuntimeError: If the data type of `var`, `m`, `v` , `vhat` and `grad` conversion of Parameter is not supported. 8189 8190 Supported Platforms: 8191 ``Ascend`` ``GPU`` ``CPU`` 8192 8193 Examples: 8194 >>> from mindspore import ops 8195 >>> import mindspore.nn as nn 8196 >>> from mindspore import Tensor, Parameter 8197 >>> import numpy as np 8198 >>> class ApplyAdamWithAmsgradNet(nn.Cell): 8199 ... def __init__(self, use_locking=False): 8200 ... super(ApplyAdamWithAmsgradNet, self).__init__() 8201 ... self.apply_adam_with_amsgrad = ops.ApplyAdamWithAmsgradV2(use_locking) 8202 ... self.var = Parameter(Tensor(np.array([[0.2, 0.2], [0.2, 0.2]]).astype(np.float32)), name="var") 8203 ... self.m = Parameter(Tensor(np.array([[0.1, 0.2], [0.4, 0.3]]).astype(np.float32)), name="m") 8204 ... self.v = Parameter(Tensor(np.array([[0.2, 0.1], [0.3, 0.4]]).astype(np.float32)), name="v") 8205 ... self.vhat = Parameter(Tensor(np.array([[0.1, 0.2], [0.6, 0.2]]).astype(np.float32)), name="vhat") 8206 ... self.beta1 = 0.8 8207 ... self.beta2 = 0.999 8208 ... self.epsilon = 1e-8 8209 ... self.beta1_power = 0.9 8210 ... self.beta2_power = 0.999 8211 ... self.lr = 0.01 8212 ... 8213 ... def construct(self, grad): 8214 ... out = self.apply_adam_with_amsgrad(self.var, self.m, self.v, self.vhat, 8215 ... self.beta1_power, self.beta2_power, self.lr, 8216 ... self.beta1, self.beta2, self.epsilon, grad) 8217 ... return out 8218 >>> net = ApplyAdamWithAmsgradNet() 8219 >>> grad = Tensor(np.array([[0.4, 0.2], [0.2, 0.3]]).astype(np.float32)) 8220 >>> output = net(grad) 8221 >>> print(net.var.asnumpy()) 8222 [[0.19886853 0.1985858 ] 8223 [0.19853032 0.19849943]] 8224 """ 8225 8226 __mindspore_signature__ = ( 8227 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8228 sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8229 sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8230 sig.make_sig('vhat', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8231 sig.make_sig('beta1_power', dtype=sig.sig_dtype.T), 8232 sig.make_sig('beta2_power', dtype=sig.sig_dtype.T), 8233 sig.make_sig('lr', dtype=sig.sig_dtype.T), 8234 sig.make_sig('beta1', dtype=sig.sig_dtype.T), 8235 sig.make_sig('beta2', dtype=sig.sig_dtype.T), 8236 sig.make_sig('epsilon', dtype=sig.sig_dtype.T), 8237 sig.make_sig('grad', dtype=sig.sig_dtype.T) 8238 ) 8239 8240 @prim_attr_register 8241 def __init__(self, use_locking=False): 8242 """Initialize ApplyAdamWithAmsgradv2""" 8243 validator.check_value_type("use_locking", use_locking, [bool], self.name) 8244 self.add_prim_attr("side_effect_mem", True) 8245 8246 8247class FractionalMaxPool(Primitive): 8248 r""" 8249 Performs fractional max pooling on the input. 8250 8251 Fractional max pooling is similar to regular max pooling, but with the added flexibility of 8252 allowing the overall reduction ratio `N` to be a non-integer value. In regular max pooling, 8253 an input set is reduced in size by taking the maximum value of `N x N` (usually 2x2) 8254 subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer. 8255 8256 In contrast, fractional max pooling uses randomly generated pool sizes that are fairly uniform in size. 8257 8258 .. warning:: 8259 "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first 8260 and last elements must be 1.0 because pooling on batch and channels dimensions is not allowed. 8261 8262 Args: 8263 pooling_ratio (list(float)): Decide the shape of output, is a list of float numbers has length >= 4. 8264 Pooling ratio for each dimension of value should not be less than 0, currently only support 8265 for row and col dimension. 8266 pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly. 8267 If the pseudo_random parameter is set to ``True`` , the sequence will be generated in a 8268 pseudo-random fashion, otherwise it will be generated randomly. 8269 Refer to `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_ 8270 by Benjamin Graham to understand the distinction between the two. 8271 Default: ``False`` . 8272 overlapping(bool, optional): When set to ``True`` , the values at the boundary of adjacent pooling cells 8273 will be shared by both cells during pooling process. When set to ``False`` , the values are not reused. 8274 Default: ``False`` . 8275 deterministic(bool, optional): If deterministic is set to ``True`` , a fixed pooling region will be used 8276 in the computation graph, ensuring that the FractionalMaxPool is deterministic. 8277 This is often used in unit tests. When set to ``False`` , fixed pool regions will not be used. 8278 Default: ``False`` . 8279 seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number 8280 generator will be seeded using the specified seed. If neither seed nor seed2 are set, 8281 the generator will be seeded by a random seed. 8282 Default: ``0`` . 8283 seed2(int, optional): The second seed to avoid seed collision. 8284 Default: ``0`` . 8285 8286 Inputs: 8287 - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64. 8288 Tensor of shape :math:`(N, H_{in}, W_{in}, C_{in})`. 8289 8290 Outputs: 8291 - **y** (Tensor) - the output of FractionalMaxPool, has the same data type with `x`. 8292 Tensor of shape :math:`(N, H_{out}, W_{out}, C_{out})`. 8293 8294 - **row_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary rows. 8295 8296 - **col_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary cols. 8297 8298 Raises: 8299 TypeError: If data type of `x` is not float32, float64, int32, int64. 8300 TypeError: If `x` is not a 4D tensor. 8301 ValueError: If element of `x` equals 0 or is less than 0. 8302 ValueError: If `pooling_ratio` is a list whose length is not equal to 4. 8303 ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0. 8304 8305 Supported Platforms: 8306 ``Ascend`` ``GPU`` ``CPU`` 8307 8308 Examples: 8309 >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64) 8310 >>> pooling_ratio=[1.0,1.5,1.5,1.0] 8311 >>> fractionalmaxpool_op = ops.FractionalMaxPool(pooling_ratio=pooling_ratio) 8312 >>> output = fractionalmaxpool_op(Tensor(x)) 8313 >>> print(output) 8314 (Tensor(shape=[1, 2, 2, 1], dtype=Int64, value= 8315 [[[[ 6], 8316 [ 8]], 8317 [[14], 8318 [16]]]]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4])) 8319 """ 8320 8321 @prim_attr_register 8322 def __init__(self, pooling_ratio, pseudo_random=False, overlapping=False, deterministic=False, seed=0, seed2=0): 8323 """Initialize FractionalMaxPool.""" 8324 self.init_prim_io_names(inputs=["x"], outputs=["y", "row_pooling_sequence", "col_pooling_sequence"]) 8325 validator.check_value_type('pooling_ratio', pooling_ratio, [list], self.name) 8326 for item in pooling_ratio: 8327 validator.check_value_type("pooling_ratio_item", item, float, self.name) 8328 validator.check_value_type("pseudo_random", pseudo_random, [bool], self.name) 8329 validator.check_value_type("overlapping", overlapping, [bool], self.name) 8330 validator.check_value_type("deterministic", deterministic, [bool], self.name) 8331 validator.check_value_type("seed", seed, [int], self.name) 8332 validator.check_value_type("seed2", seed2, [int], self.name) 8333 8334 8335class FractionalMaxPool3DWithFixedKsize(Primitive): 8336 r""" 8337 Applies a 3D fractional max pooling to an input signal composed of multiple input planes. 8338 The max-pooling operation is applied in :math:`(kD, kH, kW)` regions by a stochastic step size determined by 8339 the target output size `output_shape`. 8340 8341 The number of output features is equal to the number of input planes. 8342 8343 Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_ for more details. 8344 8345 The input and output data format can be "NCDHW" and "NDHWC". N is the batch size, C is the number of channels, 8346 D the feature depth, H is the feature height, and W is the feature width. 8347 8348 .. warning:: 8349 This is an experimental API that is subject to change or deletion. 8350 8351 Args: 8352 ksize (Union[float, tuple]): Size of the pooling window. `ksize` can be a tuple of three values specify a 8353 shape :math:`(k_D, k_H, k_W)`, or a single int `K` for :math:`(K, K, K)`. 8354 output_shape (Union[int, tuple]): The target output shape. `output_shape` can be a tuple of three values 8355 specify a shape :math:`(D_{out}, H_{out}, W_{out})`, or a single float `S` for :math:`(S, S, S)`. 8356 data_format (str, optional): The optional value for data format. 8357 Currently support ``'NCDHW'`` and ``'NHDWC'`` . Default: ``'NCDHW'`` . 8358 8359 Inputs: 8360 - **x** (Tensor) - The input of FractionalMaxPool3DWithFixedKsize, which is a 4D or 5D tensor. 8361 Tensor of data type : float16, float32, double, int32, int64. 8362 Supported shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`. 8363 - **random_samples** (Tensor) - The random step of FractionalMaxPool3DWithFixedKsize, which is a 3D tensor. 8364 Tensor of data type : float16, float32, double, and value is between (0, 1). 8365 Supported shape :math:`(N, C, 3)` 8366 8367 Outputs: 8368 - **y** (Tensor) - A tensor, the output of FractionalMaxPool3DWithFixedKsize. 8369 Has the same data type with `x`. 8370 Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`. 8371 - **argmax** (Tensor) - A tensor, the indices along with the outputs. 8372 Has the same shape as the `y` and int32 or int64 data type. 8373 8374 Raises: 8375 TypeError: If `input_x` is not a 4D or 5D tensor. 8376 TypeError: If `random_samples` is not a 3D tensor. 8377 TypeError: If data type of `x` is not float16, float32, double, int32, int64. 8378 TypeError: If dtype of `random_samples` is not float16, float32, double. 8379 TypeError: If dtype of `argmax` is not int32, int64. 8380 ValueError: If `output_shape` is a tuple and if `output_shape` length is not 3. 8381 ValueError: If `ksize` is a tuple and if `ksize` length is not 3. 8382 ValueError: If numbers in `output_shape` or `ksize` is not positive. 8383 ValueError: If `data_format` is neither 'NCDHW' nor 'NDHWC'. 8384 ValueError: If the first dimension size of `input_x` and `random_samples` is not equal. 8385 ValueError: If the second dimension size of `input_x` and `random_samples` is not equal. 8386 ValueError: If the third dimension size of `random_samples` is not 3. 8387 8388 Supported Platforms: 8389 ``Ascend`` ``GPU`` ``CPU`` 8390 8391 Examples: 8392 >>> import numpy as np 8393 >>> from mindspore import Tensor, ops 8394 >>> from mindspore import dtype as mstype 8395 >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) 8396 ... .reshape([1, 1, 2, 2, 4]), mstype.float32) 8397 >>> random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32) 8398 >>> ksize = (1, 1, 1) 8399 >>> output_shape = (1, 1, 2) 8400 >>> net = ops.FractionalMaxPool3DWithFixedKsize(ksize = ksize, output_shape = output_shape) 8401 >>> output, argmax = net(x, random_samples) 8402 >>> print(output) 8403 [[[[[13. 16.]]]]] 8404 >>> print(argmax) 8405 [[[[[12 15]]]]] 8406 """ 8407 8408 @prim_attr_register 8409 def __init__(self, ksize, output_shape, data_format="NCDHW"): 8410 """Initialize FractionalMaxPool3DWithFixedKsize.""" 8411 self.init_prim_io_names(inputs=["x", "random_samples"], outputs=["y", "argmax"]) 8412 validator.check_value_type("ksize", ksize, [int, tuple], self.name) 8413 self.ksize = ksize 8414 if isinstance(self.ksize, int): 8415 self.ksize = (ksize, ksize, ksize) 8416 if len(self.ksize) != 3: 8417 raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive int number or a tuple of " 8418 f"three positive int numbers, but got {len(self.ksize)} numbers.") 8419 for item in self.ksize: 8420 validator.check_positive_int(item, 'ksize item', self.name) 8421 self.output_shape = validator.check_value_type("output_shape", output_shape, [int, tuple], self.name) 8422 self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name) 8423 self.output_shape = _check_3d_int_or_tuple("output_shape", output_shape, 8424 self.name, allow_five=False, ret_five=False) 8425 self.add_prim_attr("ksize", self.ksize) 8426 self.add_prim_attr("output_shape", self.output_shape) 8427 8428 8429class FractionalAvgPool(Primitive): 8430 r""" 8431 Performs fractional avg pooling on the input. 8432 8433 Fractional avg pooling is similar to regular avg pooling, but with the added flexibility of 8434 allowing the overall reduction ratio `N` to be a non-integer value. In regular avg pooling, 8435 an input set is reduced in size by taking the average value of `N x N` (usually 2x2) 8436 subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer. 8437 8438 .. warning:: 8439 "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first 8440 and last elements must be 1.0 because we don't allow pooling on batch and channels dimensions. 8441 8442 Args: 8443 pooling_ratio (list(float)): Decide the shape of output, is a list of floats that has length >= 4. 8444 Pooling ratio for each dimension of value should be >=0, currently only support for row and col 8445 dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and 8446 channels dimensions. 8447 pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly. 8448 If the pseudo_random parameter is set to ``True`` , the sequence will be generated in a 8449 pseudo-random fashion, otherwise it will be generated randomly. 8450 Refer to `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_ 8451 by Benjamin Graham to understand the distinction between the two. 8452 Default: ``False`` . 8453 overlapping(bool, optional): When set to ``True`` , the values at the boundary of adjacent pooling cells 8454 will be shared by both cells during pooling process. When set to ``False`` , the values are not reused. 8455 Default: ``False`` . 8456 deterministic(bool, optional): If deterministic is set to ``True`` , a fixed pooling region will be used 8457 in the computation graph, ensuring that the FractionalAvgPool is deterministic. 8458 This is often used in unit tests. When set to ``False`` , fixed pool regions will not be used. 8459 Default: ``False`` . 8460 seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number 8461 generator will be seeded using the specified seed. If neither seed nor seed2 are set, 8462 the generator will be seeded by a random seed. 8463 Default: ``0`` . 8464 seed2(int, optional): The second seed to avoid seed collision. 8465 Default: ``0`` . 8466 8467 Inputs: 8468 - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64. 8469 Tensor of shape :math:`(N, H_{in}, W_{in}, C_{in})`. 8470 8471 Outputs: 8472 - **y** (Tensor) - A tensor, the output of FractionalAvgPool, has the same data type with `x`. 8473 Tensor of shape :math:`(N, H_{out}, W_{out}, C_{out})`. 8474 8475 - **row_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary rows. 8476 8477 - **col_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary cols. 8478 8479 Raises: 8480 TypeError: If data type of `x` is not float32, float64, int32, int64. 8481 TypeError: If `x` is not a 4D tensor. 8482 ValueError: If element of `x` equals 0 or is less than 0. 8483 ValueError: If `pooling_ratio` is a list whose length is not equal to 4. 8484 ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0. 8485 8486 Supported Platforms: 8487 ``Ascend`` ``GPU`` ``CPU`` 8488 8489 Examples: 8490 >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64) 8491 >>> pooling_ratio=[1.0,1.5,1.5,1.0] 8492 >>> fractionalavgpool_op = ops.FractionalAvgPool(pooling_ratio=pooling_ratio) 8493 >>> output = fractionalavgpool_op(Tensor(x)) 8494 >>> print(output) 8495 (Tensor(shape=[1, 2, 2, 1], dtype=Int64, value= 8496 [[[[ 3], 8497 [ 5]], 8498 [[11], 8499 [13]]]]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4])) 8500 """ 8501 8502 @prim_attr_register 8503 def __init__(self, pooling_ratio, pseudo_random=False, overlapping=False, deterministic=False, seed=0, seed2=0): 8504 """Initialize FractionalAvgPool.""" 8505 self.init_prim_io_names(inputs=["x"], outputs=["y", "row_pooling_sequence", "col_pooling_sequence"]) 8506 validator.check_value_type('pooling_ratio', pooling_ratio, [list], self.name) 8507 for item in pooling_ratio: 8508 validator.check_value_type("pooling_ratio_item", item, float, self.name) 8509 validator.check_value_type("pseudo_random", pseudo_random, [bool], self.name) 8510 validator.check_value_type("overlapping", overlapping, [bool], self.name) 8511 validator.check_value_type("deterministic", deterministic, [bool], self.name) 8512 validator.check_value_type("seed", seed, [int], self.name) 8513 validator.check_value_type("seed2", seed2, [int], self.name) 8514 8515 8516class NthElement(Primitive): 8517 r""" 8518 Computes the n-th smallest values for the last dimension of the input Tensor. 8519 8520 - When `input` is a 1-D Tensor (i.e. Vector), it finds the nth-smallest value in the vector 8521 and outputs its value as a scalar Tensor. 8522 - When `input` is matrices or has higher rank, it finds the nth-smallest value 8523 in each row (or vector along the last dimension) and outputs 8524 these values in a Tensor with shape of `values.shape = input.shape[:-1]`. 8525 8526 Args: 8527 reverse (bool, optional): An optional bool. If set to ``True`` , it find the :math:`n`-th largest value 8528 in the vector instead of the nth-smallest. Default: ``False`` . 8529 8530 Inputs: 8531 - **input** (Tensor) - Input Tensor with 1-D or higher dimension. 8532 - **n** (Union[int, Tensor]) - If the `n` is a Tensor, it should be a 0-D Tensor, dtype is int32. 8533 Valid range of `n` is :math:`[0, input.shape[-1])` where :math:`input.shape[-1]` is 8534 last dimension size of `input`. 8535 8536 Outputs: 8537 - **values** (Tensor) - Its shape satisfies: `values`.shape = `input`.shape[:-1]. 8538 The dtype is the same as `input`. 8539 8540 Raises: 8541 TypeError**: If the type of `input` is out of the valid list. 8542 TypeError**: If `n` is not int32 or not a Tensor. 8543 ValueError**: If n is out of :math:`[0, input.shape[-1])`. 8544 8545 Supported Platforms: 8546 ``Ascend`` ``GPU`` ``CPU`` 8547 8548 Examples: 8549 >>> input = Tensor(np.array([[1,2,3],[4,5,6]]) , mstype.int8) 8550 >>> n = 1 8551 >>> net = ops.NthElement() 8552 >>> out = net(input, n) 8553 >>> print(out) 8554 [2 5] 8555 """ 8556 8557 @prim_attr_register 8558 def __init__(self, reverse=False): 8559 """Initialize NthElement.""" 8560 self.reverse = validator.check_value_type("reverse", reverse, [bool], self.name) 8561 self.add_prim_attr("reverse", self.reverse) 8562 self.init_prim_io_names(inputs=['input', 'n'], 8563 outputs=['output']) 8564 8565 8566class PSROIPooling(Primitive): 8567 r""" 8568 Applies Position Sensitive ROI-Pooling on input Tensor. 8569 8570 Args: 8571 spatial_scale (float): a scaling factor that maps the box coordinates to the input coordinates. 8572 For example, if your boxes are defined on the scale of a 224x224 image and 8573 your input is a 112x112 feature map (resulting from a 0.5x scaling of the original 8574 image), you'll want to set this to 0.5. 8575 group_size (int): the size of the output (in pixels) after the pooling is performed, as (height, width). 8576 output_dim (int): the dim of the output after the pooling is performed. 8577 8578 Inputs: 8579 - **features** (Tensor) - The input features, whose shape must be :math:`(N, C, H, W)`. With data type is 8580 float16 or float32. This formula should hold: :math:`(C == output\_dim * group\_size * group\_size)`. 8581 - **rois** (Tensor) - The shape is `(batch, 5, rois_n)`. With data type of float16 or float32. 8582 The size of first dimension `batch` is batch_size. The size of the second dimension must be `5`. 8583 The size of third dimension `rois_n` is the number of rois. The value of `rois` like: 8584 (index, x1, y1, x2, y2). The first element of `rois_n` is the index of the `rois`. And the box coordinates 8585 in (x1, y1, x2, y2) format where the regions will be taken from. The coordinate must satisfy 8586 0 <= x1 < x2 and 0 <= y1 < y2. 8587 8588 Outputs: 8589 - **out** (Tensor) - The result after pooling. Its shape 8590 is :math:`(rois.shape[0] * rois.shape[2], output\_dim, group\_size, group\_size)`. 8591 8592 Raises: 8593 TypeError: If `spatial_scale` is not a float. 8594 TypeError: If `group_size` or `output_dim` is not an int. 8595 TypeError: If `features` or `rois` is not a Tensor. 8596 TypeError: If dtype of `rois` is not float16 or float32. 8597 ValueError: If shape of `features` does not satisfy :math:`(C == output\_dim * group\_size * group\_size)`. 8598 ValueError: If `spatial_scale` is negative. 8599 8600 Supported Platforms: 8601 ``Ascend`` 8602 8603 Examples: 8604 >>> import mindspore 8605 >>> import numpy as np 8606 >>> from mindspore import Tensor, ops 8607 >>> features = np.random.randn(4, 3 * 7 * 7, 80, 48) 8608 >>> features = Tensor.from_numpy(features).astype(mindspore.float32) 8609 >>> rois = Tensor.from_numpy( 8610 ... np.array([[[0.0000], 8611 ... [150.3563], 8612 ... [200.1320], 8613 ... [579.3563], 8614 ... [602.3452]], 8615 ... [[1.0000], 8616 ... [657.1263], 8617 ... [302.8564], 8618 ... [762.4214], 8619 ... [567.9854]], 8620 ... [[2.0000], 8621 ... [321.3122], 8622 ... [232.2410], 8623 ... [679.0281], 8624 ... [587.6346]], 8625 ... [[3.0000], 8626 ... [664.1630], 8627 ... [387.4919], 8628 ... [778.7322], 8629 ... [562.7321]]])).astype(mindspore.float32) 8630 >>> psROIPooling = ops.PSROIPooling(spatial_scale=1.0/16, output_dim=3, 8631 ... group_size=7) 8632 >>> out = psROIPooling(features, rois) 8633 >>> print(out.shape) 8634 (4, 3, 7, 7) 8635 >>> print(out.dtype) 8636 Float32 8637 """ 8638 8639 @prim_attr_register 8640 def __init__(self, spatial_scale, group_size, output_dim): 8641 """Initialize PSROIPooling""" 8642 validator.check_positive_float(spatial_scale, "spatial_scale", self.name) 8643 validator.check_positive_int(group_size, "group_size", self.name) 8644 validator.check_positive_int(output_dim, "output_dim", self.name) 8645 self.spatial_scale = spatial_scale 8646 self.group_size = group_size 8647 self.output_dim = output_dim 8648 8649 self.add_prim_attr('spatial_scale', self.spatial_scale) 8650 self.add_prim_attr('group_size', self.group_size) 8651 self.add_prim_attr('output_dim', self.output_dim) 8652 8653 8654class TripletMarginLoss(Primitive): 8655 r""" 8656 TripletMarginLoss operation. 8657 8658 Creates a criterion that measures the triplet loss given an input 8659 tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`. 8660 This is used for measuring a relative similarity between samples. A triplet 8661 is composed by `a`, `p` and `n` (i.e., `anchor`, `positive examples` and `negative 8662 examples` respectively). The shapes of all input tensors should be 8663 :math:`(N, D)`. 8664 8665 The distance swap is described in detail in the paper 8666 `Learning local feature descriptors with triplets and shallow convolutional neural 8667 networks <http://158.109.8.37/files/BRP2016.pdf>`_ 8668 by V. Balntas, E. Riba et al. 8669 8670 The loss function for each sample in the mini-batch is: 8671 8672 .. math:: 8673 L(a, p, n) = \max \{d(a_i, p_i) - d(a_i, n_i) + {\rm margin}, 0\} 8674 8675 where 8676 8677 .. math:: 8678 d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p 8679 8680 Args: 8681 p (int, optional): The norm degree for pairwise distance. Default: ``2`` . 8682 eps (float, optional): Default: ``1e-6`` . 8683 swap (bool, optional): The distance swap. Default: ``False`` . 8684 reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , 8685 ``'sum'`` . Default: ``'mean'`` . 8686 8687 - ``'none'``: no reduction will be applied. 8688 - ``'mean'``: compute and return the mean of elements in the output. 8689 - ``'sum'``: the output elements will be summed. 8690 8691 Inputs: 8692 - **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType. 8693 - **positive** (Tensor) - A sample belonging to the same category as x, 8694 with the same type and shape as `x`. 8695 - **negative** (Tensor) - A sample belonging to the different class from x, 8696 with the same type and shape as `x`. 8697 - **margin** (Tensor) - Make a margin between the positive pair and the negative pair. 8698 8699 Outputs: 8700 Union[Tensor, Scalar], if `reduction` is ``"none"``, its shape is :math:`(N)`. 8701 Otherwise, a scalar value will be returned. 8702 8703 Raises: 8704 TypeError: If `x` or `positive` or `negative` or `margin` is not a Tensor. 8705 TypeError: If dtype of `x` or `positive` or `negative` is not BasicType. 8706 TypeError: If dtype of `x`, `positive` and `negative` is not the same. 8707 TypeError: If `margin` is not float32. 8708 TypeError: If `p` is not an int. 8709 TypeError: If `eps` is not a float. 8710 TypeError: If `swap` is not a bool. 8711 ValueError: If dimensions of input `x`, `positive` and `negative` are 8712 less than or equal to 1 at the same time. 8713 ValueError: If the dimension of input `x` or `positive` or `negative` 8714 is bigger than or equal to 8. 8715 ValueError: If length of shape of `margin` is not 0. 8716 ValueError: If shape of `x`, `positive` and `negative` cannot broadcast. 8717 ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``. 8718 8719 Supported Platforms: 8720 ``GPU`` 8721 8722 Examples: 8723 >>> import mindspore 8724 >>> import numpy as np 8725 >>> from mindspore import Tensor, ops 8726 >>> loss = ops.TripletMarginLoss() 8727 >>> x = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32) 8728 >>> positive = Tensor(np.array([[0.4, 0.6], [0.4, 0.6]]), mindspore.float32) 8729 >>> negative = Tensor(np.array([[0.2, 0.9], [0.3, 0.7]]), mindspore.float32) 8730 >>> margin = Tensor(1.0, mindspore.float32) 8731 >>> output = loss(x, positive, negative, margin) 8732 >>> print(output) 8733 0.8881968 8734 """ 8735 8736 @prim_attr_register 8737 def __init__(self, p=2, swap=False, eps=1e-6, reduction="mean"): 8738 """Initialize TripletMarginLoss""" 8739 self.init_prim_io_names(inputs=['x', 'positive', 'negative', 'margin'], outputs=['y']) 8740 validator.check_value_type("p", p, [int], self.name) 8741 validator.check_value_type("swap", swap, [bool], self.name) 8742 validator.check_value_type("eps", eps, [float], self.name) 8743 self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) 8744 8745 8746class DeformableOffsets(Primitive): 8747 r""" 8748 Computes the deformed convolution output with the expected input. 8749 8750 Refer to :func:`mindspore.ops.deformable_conv2d` for more details. 8751 8752 Supported Platforms: 8753 ``Ascend`` ``GPU`` ``CPU`` 8754 """ 8755 8756 @prim_attr_register 8757 def __init__(self, 8758 strides, 8759 pads, 8760 ksize, 8761 dilations=(1, 1, 1, 1), 8762 data_format="NCHW", 8763 deformable_groups=1, 8764 modulated=True): 8765 """Initialize DeformableOffsets""" 8766 self.init_prim_io_names(inputs=['x', 'offsets'], outputs=['y']) 8767 8768 self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name) 8769 pos_c = 1 8770 if self.format == "NHWC": 8771 pos_c = 3 8772 self.add_prim_attr('format', self.format) 8773 8774 validator.check_size_and_element_type_of_tuple('strides', strides, 4, int, self.name) 8775 if strides[0] != 1 or strides[pos_c] != 1: 8776 raise ValueError(f"For '{self.name}', The N and C dimensions of 'strides' must be set to 1.") 8777 self.add_prim_attr('strides', strides) 8778 8779 validator.check_size_and_element_type_of_tuple('pads', pads, 4, int, self.name) 8780 self.add_prim_attr('pads', pads) 8781 8782 validator.check_size_and_element_type_of_tuple('kernel_size', ksize, 2, int, self.name) 8783 self.add_prim_attr('ksize', ksize) 8784 8785 validator.check_size_and_element_type_of_tuple('dilations', dilations, 4, int, self.name) 8786 if dilations[0] != 1 or dilations[pos_c] != 1: 8787 raise ValueError(f"For '{self.name}', The N and C dimensions of 'dilations' must be set to 1.") 8788 self.add_prim_attr('dilations', dilations) 8789 8790 self.deformable_groups = validator.check_positive_int(deformable_groups, 'deformable_groups', self.name) 8791 self.add_prim_attr('deformable_groups', self.deformable_groups) 8792 8793 self.modulated = validator.check_bool(modulated, 'modulated', self.name) 8794 if self.modulated is not True: 8795 raise ValueError(f"For '{self.name}', The modulated must be set to True.") 8796 self.add_prim_attr('modulated', self.modulated) 8797 8798 8799class Pdist(Primitive): 8800 r""" 8801 Computes the p-norm distance between each pair of row vectors in the input. 8802 8803 Refer to :func:`mindspore.ops.pdist` for more details. 8804 8805 Note: 8806 The pdist operator involves exponentiation, the inf/nan calculation result may be generated 8807 when the float16 input is used. The float32 input is recommended. 8808 8809 Args: 8810 p (float, optional): The order of norm distance, :math:`p∈[0, ∞)`. Default: ``2.0`` . 8811 8812 Inputs: 8813 - **x** (Tensor) - Input tensor. Supported dtypes: float16, float32 or float64. 8814 8815 Outputs: 8816 Tensor, has the same dtype as `x`. 8817 8818 Supported Platforms: 8819 ``GPU`` ``CPU`` 8820 8821 Examples: 8822 >>> from mindspore import Tensor, ops 8823 >>> import numpy as np 8824 >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32)) 8825 >>> op = ops.Pdist(p=2.0) 8826 >>> y = op(x) 8827 >>> print(y) 8828 [1.4142135 2.828427 1.4142135] 8829 """ 8830 8831 @prim_attr_register 8832 def __init__(self, p=2.0): 8833 """Initialize Pdist""" 8834 validator.check_value_type("p", p, [float], self.name) 8835 if p < 0: 8836 raise ValueError('Pdist p must be a non-negative value, but got `{}`.'.format(p)) 8837 self.init_prim_io_names(inputs=['x'], outputs=['y']) 8838 8839 8840class SparseApplyAdagradDA(Primitive): 8841 r""" 8842 Update `var` according to the proximal adagrad scheme. 8843 8844 .. math:: 8845 \begin{array}{ll} \\ 8846 grad_accum += grad \\ 8847 grad_squared_accum += grad * grad \\ 8848 tmp_val=sign(grad_accum) * max\left \{|grad_accum|-l1*global_step, 0\right \} 8849 if l1>0 else grad_accum \\ 8850 x_value = -1 * lr * tmp_val \\ 8851 y_value = l2 * global_step * lr + \sqrt{grad_squared_accum} \\ 8852 var = x_value / y_value 8853 \end{array} 8854 8855 Inputs of `var`, `grad_accum`, `grad_square_accum` and `grad` 8856 comply with the implicit type conversion rules to make the data types consistent. 8857 If they have different data types, lower priority data type will be converted to the 8858 relatively highest priority data type. 8859 8860 Args: 8861 use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock. 8862 Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` . 8863 8864 Inputs: 8865 - **var** (Parameter) - Variable to be updated. 8866 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 8867 - **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same 8868 shape and dtype as `var`. 8869 - **grad_square_accum** (Parameter) - The dict of mutable tensor grad_square_accum. 8870 Must have the same shape and dtype as `var`. 8871 - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1. 8872 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 8873 If there are duplicates in `indices`, the behavior is undefined. Must be one of the 8874 following types: int32, int64 and indices.shape[0] = grad.shape[0]. 8875 - **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. Must have the same type as `var`. 8876 - **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar. Must have the same type as `var`. 8877 - **l2** (Union[Number, Tensor]) - L2 regularization. Must be a scalar. Must have the same type as `var`. 8878 - **global_step** (Union[Number, Tensor]) - Training step number. Must be a scalar. 8879 Must be one of the following types: int32, int64. 8880 8881 Outputs: 8882 Tensor, with the same type and shape as 'var'. 8883 8884 Raises: 8885 TypeError: If `var`, `grad_accum`, `grad_square_accum` is not a Parameter. 8886 TypeError: If `grad` is not a Tensor. 8887 TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor. 8888 TypeError: If use_locking is not a bool. 8889 TypeError: If dtype of `var`, `grad_accum`, `grad_square_accum`, `grad_accum` is not the same. 8890 TypeError: If dtype of `grad_accum`, `grad_square_accum`, `grad_accum` 8891 is not same as `var`. 8892 TypeError: If dtype of `indices` is neither int32 nor int64. 8893 TypeError: If shape of `indices` is not same as shape of first dimension of `grad`. 8894 TypeError: If dtype of `global_step` is not int64. 8895 ValueError: If the shape size of `lr`, `l1`, `l2` and `global_step` is not 0. 8896 RuntimeError: If the data type of `var`, `grad_accum`, `grad_square_accum` and `grad` 8897 conversion of Parameter is not supported. 8898 8899 Supported Platforms: 8900 ``GPU`` ``CPU`` 8901 8902 Examples: 8903 >>> var = Parameter(Tensor(np.array([[1,2], [1,2]]).astype(np.float32))) 8904 >>> grad_accum = Parameter(Tensor(np.array([[2,1], [3,1]]).astype(np.float32))) 8905 >>> grad_square_accum = Parameter(Tensor(np.array([[4,1], [5,1]]).astype(np.float32))) 8906 >>> grad = Tensor(np.array([[5,1], [6,1]]).astype(np.float32)) 8907 >>> indices = Tensor(np.array([0, 1], dtype=np.int32)) 8908 >>> lr = Tensor(2, mstype.float32) 8909 >>> l1 = Tensor(-1, mstype.float32) 8910 >>> l2 = Tensor(1, mstype.float32) 8911 >>> global_step=Tensor(1, mstype.int64) 8912 >>> sparse_apply_adagrad_da = nn_ops.SparseApplyAdagradDA() 8913 >>> output = sparse_apply_adagrad_da(var, grad_accum, grad_square_accum, 8914 ... grad, indices, lr, l1, l2, global_step) 8915 >>> print(output) 8916 [[-1.8956923 -1.1715728] 8917 [-2.1420605 -1.1715728]] 8918 """ 8919 8920 __mindspore_signature__ = ( 8921 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8922 sig.make_sig('grad_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8923 sig.make_sig('grad_square_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 8924 sig.make_sig('grad', dtype=sig.sig_dtype.T), 8925 sig.make_sig('indices', dtype=sig.sig_dtype.T1), 8926 sig.make_sig('lr', dtype=sig.sig_dtype.T), 8927 sig.make_sig('l1', dtype=sig.sig_dtype.T), 8928 sig.make_sig('l2', dtype=sig.sig_dtype.T), 8929 sig.make_sig('global_step', dtype=sig.sig_dtype.T2) 8930 ) 8931 8932 @prim_attr_register 8933 def __init__(self, use_locking=False): 8934 """Initialize SparseApplyAdagradDA""" 8935 self.init_prim_io_names(inputs=['var', 'grad_accum', 'grad_square_accum', 8936 'grad', 'indices', 'lr', 'l1', 'l2', 'global_step'], 8937 outputs=['var']) 8938 validator.check_value_type("use_locking", use_locking, [bool], self.name) 8939 8940 8941class SparseApplyMomentum(Primitive): 8942 r""" 8943 Update relevant entries in '*var' and '*accum' according to the momentum scheme. 8944 8945 .. math:: 8946 \begin{array}{ll} \\ 8947 accum = accum * momentum + grad \\ 8948 var -= lr * accum 8949 \end{array} 8950 8951 Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules 8952 to make the data types consistent. 8953 If they have different data types, lower priority data type will be converted to 8954 the relatively highest priority data type. 8955 8956 Args: 8957 use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated. 8958 Default: ``False`` . 8959 use_nesterov (bool): If `True`, the tensor passed to compute grad will be var + momentum * accum, 8960 so in the end, the var you get is actually var + momentum * accum. Default: ``False`` . 8961 8962 Inputs: 8963 - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64, 8964 uint8, uint16, uint32, uint64, float16, float32 or float64. 8965 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 8966 - **accum** (Parameter) - Variable tensor to be updated, has the same shape and type as `var`. 8967 - **lr** (Union[Number, Tensor]) - The learning rate value. Must be a scalar with same type as `var`. 8968 - **grad** (Tensor) - A tensor for gradient, has the same type as `var`, 8969 and grad.shape[1:] = var.shape[1:] if rank(var) > 1. 8970 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 8971 If there are duplicates in `indices`, the behavior is undefined. Must be one of the 8972 following types: int32, int64 and indices.shape[0] = grad.shape[0]. 8973 - **momentum** (Union[Number, Tensor]) - Momentum. Must be a scalar with same type as `var`. 8974 8975 Outputs: 8976 - **var** (Tensor) - Tensor, has the same shape and type as 'var'. 8977 8978 Raises: 8979 TypeError: If `var`, `accum`, `grad` or `indices` is not a Parameter. 8980 TypeError: If `lr`, `momentum` is neither a Number nor a Tensor. 8981 TypeError: If `use_locking` or `use_nesterov` is not a bool. 8982 TypeError: If dtype of `var`, `accum`, `lr`, `grad`, or `momentum` is not one of int8, int16, 8983 int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64. 8984 TypeError: If dtype of `indices` is neither int32 nor int64. 8985 ValueError: If the shape of `var`, `accum` or `grad` is rank 0. 8986 ValueError: If shape of `accum` or `grad` is not same as `var`. 8987 ValueError: If shape of `indices` is not same as the shape of first dimension of `grad`. 8988 ValueError: If the shape of `lr` or `momentum` is not rank 0. 8989 RuntimeError: If the data type of `var`, `accum`, `lr`, `grad` and 'momentum' conversion of Parameter 8990 is not supported. 8991 8992 Supported Platforms: 8993 ``GPU`` ``CPU`` 8994 8995 Examples: 8996 >>> import mindspore.ops.operations.nn_ops as nn_ops 8997 >>> var = Tensor(np.array([[4.1, 7.2], [1.1, 3.0]]).astype(np.float32)) 8998 >>> accum = Tensor(np.array([[2.2, 3.0], [3.1, 0.5]]).astype(np.float32)) 8999 >>> lr = Tensor(0.01, mstype.float32) 9000 >>> grad = Tensor(np.array([[0.3, 0.2], [0.4, 0.1]]).astype(np.float32)) 9001 >>> indices = Tensor(np.array([0, 1]), mstype.int32) 9002 >>> momentum = Tensor(0.99, mstype.float32) 9003 >>> sparse_apply_momentum = nn_ops.SparseApplyMomentum() 9004 >>> output = sparse_apply_momentum(var, accum, lr, grad, indices, momentum) 9005 >>> print(output) 9006 [[4.07522 7.1682997] 9007 [1.06531 2.99405 ]] 9008 """ 9009 9010 __mindspore_signature__ = ( 9011 sig.make_sig('var', dtype=sig.sig_dtype.T), 9012 sig.make_sig('accum', dtype=sig.sig_dtype.T), 9013 sig.make_sig('lr', dtype=sig.sig_dtype.T), 9014 sig.make_sig('grad', dtype=sig.sig_dtype.T), 9015 sig.make_sig('indices', dtype=sig.sig_dtype.T1), 9016 sig.make_sig('momentum', dtype=sig.sig_dtype.T) 9017 ) 9018 9019 @prim_attr_register 9020 def __init__(self, use_locking=False, use_nesterov=False): 9021 """Initialize SparseApplyMomentum""" 9022 self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'grad', 'indices', 'momentum'], 9023 outputs=['var']) 9024 validator.check_value_type("use_locking", use_locking, [bool], self.name) 9025 validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) 9026 9027 9028class SparseApplyProximalGradientDescent(Primitive): 9029 r""" 9030 Sparse update '*var' as FOBOS algorithm with fixed learning rate. 9031 9032 .. math:: 9033 \begin{array}{ll} \\ 9034 \text{prox_v} = var - alpha \\ 9035 var = sign(\text{prox_v})/(1 + alpha * l2) * \max(\left| \text{prox_v} \right| - alpha * l1,0) 9036 \end{array} 9037 9038 Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent. 9039 If they have different data types, the lower priority data type will be converted to 9040 the relatively highest priority data type. 9041 9042 Args: 9043 use_locking (bool): If ``True`` , the `var` tensors will be protected from being updated. 9044 Default: ``False`` . 9045 9046 Inputs: 9047 - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64, 9048 uint8, uint16, uint32, uint64, float16, float32 or float64. 9049 The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions. 9050 - **alpha** (Union[Number, Tensor]) - Scaling factor. Must be a scalar with same type as `var`. 9051 - **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar with same type as `var`. 9052 - **l2** (Union[Number, Tensor]) - l2 regularization. Must be a scalar with same type as `var`. 9053 - **grad** (Tensor) - A tensor for gradient, has the same type as `var`, 9054 and grad.shape[1:] = var.shape[1:] if rank(var) > 1. 9055 - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`. 9056 If there are duplicates in `indices`, the behavior is undefined. Must be one of the 9057 following types: int32, int64 and indices.shape[0] = grad.shape[0]. 9058 9059 Outputs: 9060 - **var** (Tensor) - Tensor, has the same shape and type as 'var'. 9061 9062 Raises: 9063 TypeError: If `var`, `grad` or `indices` is not a Parameter.. 9064 TypeError: If `alpha`, `l1`, `l2` is neither a Number nor a Tensor. 9065 TypeError: If `use_locking` is not a bool. 9066 TypeError: If dtype of `var`, `alpha`, `l1`, `l2` or `grad` is not one of int8, int16, 9067 int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64. 9068 TypeError: If dtype of `indices` is neither int32 nor int64. 9069 ValueError: If the shape of `var` or `grad` is rank 0. 9070 ValueError: If shape of `grad` is not same as `var`. 9071 ValueError: If the shape of `alpha`, `l1` or `l2` is not rank 0. 9072 ValueError: If shape of `indices` is not same as the shape of first dimension of `grad`. 9073 RuntimeError: If the data type of `var`, `alpha`, `l1`, `l2`, `grad` conversion of Parameter 9074 is not supported. 9075 9076 Supported Platforms: 9077 ``GPU`` ``CPU`` 9078 9079 Examples: 9080 >>> import mindspore.ops.operations.nn_ops as nn_ops 9081 >>> var = Tensor(np.array([[4.1, 7.2], [1.1, 3.0]]).astype(np.float32)) 9082 >>> alpha = Tensor(1.0, mstype.float32) 9083 >>> l1 = Tensor(1.0, mstype.float32) 9084 >>> l2 = Tensor(0.0, mstype.float32) 9085 >>> grad = Tensor(np.array([[1, 1], [1, 1]]).astype(np.float32)) 9086 >>> indices = Tensor(np.array([0, 1]).astype(np.int32)) 9087 >>> sparse_apply_proximal_gradient_descent = nn_ops.SparseApplyProximalGradientDescent() 9088 >>> output = sparse_apply_proximal_gradient_descent(var, alpha, l1, l2, grad, indices) 9089 >>> print(output) 9090 [[2.1 5.2] 9091 [0. 1. ]] 9092 """ 9093 9094 __mindspore_signature__ = ( 9095 sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T), 9096 sig.make_sig('alpha', dtype=sig.sig_dtype.T), 9097 sig.make_sig('l1', dtype=sig.sig_dtype.T), 9098 sig.make_sig('l2', dtype=sig.sig_dtype.T), 9099 sig.make_sig('grad', dtype=sig.sig_dtype.T), 9100 sig.make_sig('indices', dtype=sig.sig_dtype.T1) 9101 ) 9102 9103 @prim_attr_register 9104 def __init__(self, use_locking=False): 9105 """Initialize SparseApplyProximalGradientDescent.""" 9106 self.init_prim_io_names(inputs=['var', 'alpha', 'l1', 'l2', 'grad', 'indices'], 9107 outputs=['var']) 9108 validator.check_value_type("use_locking", use_locking, [bool], self.name) 9109 9110 9111class NuclearNorm(Primitive): 9112 r""" 9113 Returns the matrix nuclear norm of a given Tensor. 9114 9115 Attr `dim` specifies which two dimensions of the input `x` to calculate the nuclear norm across. If `dim` is None, 9116 the nuclear norm will be calculated across all dimensions of input. Because the nuclear norm is the sum of the 9117 singular values of the matrix, the input at this time should be 2-dimensional. That is, if the input is 9118 2-dimensional, we compute the nuclear norm of the input matrix. At this point, `dim` should be None. If you set 9119 `dim`, it also needs to be in the proper range, otherwise it wonn't work. If the input is 3-dimensional and above, 9120 the attribute `dim` is required. It specifies which two dimensions of input to calculate the nuclear norm across. 9121 9122 According to the `dim` list, the input Tensor is reordered by `dim`. The two dimensions pointed to by the attribute 9123 `dim` are placed at the end, and the order of the other dimensions is relatively unchanged. Perform the SVD of each 9124 slice of the adjusted Tensor to obtain the singular value. Sum all of the singular value of each slice/matrix to 9125 obtain the nuclear norm. 9126 9127 Args: 9128 dim (Union[list(int), tuple(int)], optional): Specifies which two 9129 dimensions of `x` to calculate the matrix nuclear norm 9130 across. If `dim` is None, the nuclear norm will be calculated across all dimensions of `x`. The length of 9131 `dim` should be 2. The value in `dim` should be in this range:[-x_rank, x_rank). x_rank is the dimension of 9132 Tensor `x`. The value of `dim[0]` or `dim[1]` can not point to the same dimension. Default: ``None`` . 9133 keepdim (bool, optional): Whether the output Tensor have `dim` retained or not. Default: ``False`` . 9134 9135 Inputs: 9136 - **x** (Tensor) - Input to compute the matrix nuclear norm. The dimension of `x` should be greater than or 9137 equal to 2. Data type must be float32 or float64. 9138 9139 Outputs: 9140 Tensor, output Tensor with dimensions in `dim` reduced to 1 will be returned if `keepdim` is `True`; 9141 otherwise a Tensor with dimensions in `dim` removed is returned. The data type is same as `x`. 9142 9143 Raises: 9144 TypeError: If `x` is not a Tensor. 9145 TypeError: If dtype of `x` is neither float32 nor float64. 9146 TypeError: If dtype of `dim` is neither list(int) nor tuple(int). 9147 TypeError: If dtype of `keepdim` is not bool. 9148 ValueError: If dimension of Tensor `x` is less than 2. 9149 ValueError: If the length of `dim` is not 2 when `dim` is set. 9150 ValueError: If the dimension of Tensor `x` is not 2 when `dim` is not set. 9151 ValueError: If `dim[0]` or `dim[1]` point to the same dimension. 9152 ValueError: If `dim[0]` or `dim[1]` is not in this range:[-x_rank, x_rank). 9153 x_rank is the dimension of Tensor `x`. 9154 9155 Supported Platforms: 9156 ``Ascend`` ``CPU`` 9157 9158 Examples: 9159 >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], 9160 ... [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]]), ms.float32) 9161 >>> dim = [0, 2] 9162 >>> keepdim = True 9163 >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim) 9164 >>> output = nuclearnorm(input_x) 9165 >>> print(output) 9166 [[[15.407588] 9167 [21.711605]]] 9168 >>> keepdim = False 9169 >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim) 9170 >>> output = nuclearnorm(input_x) 9171 >>> print(output) 9172 [15.407588 21.711605] 9173 >>> dim = [0, 1] 9174 >>> keepdim = True 9175 >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim) 9176 >>> output = nuclearnorm(input_x) 9177 >>> print(output) 9178 [[[14.212674 15.81139 17.492853]]] 9179 >>> keepdim = False 9180 >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim) 9181 >>> output = nuclearnorm(input_x) 9182 >>> print(output) 9183 [14.212674 15.81139 17.492853] 9184 """ 9185 9186 @prim_attr_register 9187 def __init__(self, dim=None, keepdim=False): 9188 """Initialize NuclearNorm.""" 9189 validator.check_value_type("dim", dim, [list, tuple, type(None)], self.name) 9190 if dim is not None: 9191 validator.check_int(len(dim), 2, validator.EQ, 'length of dim_size', self.name) 9192 validator.check_is_int(dim[0], "dim[0]", self.name) 9193 validator.check_is_int(dim[1], "dim[1]", self.name) 9194 else: 9195 self.add_prim_attr('dim', [1000]) 9196 validator.check_value_type("keepdim", keepdim, [bool], self.name) 9197 9198 9199class GLU(Primitive): 9200 r""" 9201 Computes GLU (Gated Linear Unit activation function) of input tensors. 9202 9203 .. warning:: 9204 This is an experimental API that is subject to change or deletion. 9205 9206 Refer to :func:`mindspore.ops.glu` for more details. 9207 9208 Args: 9209 axis (int, optional): Axis on which to split the input. 9210 The value of `axis` must be an int within range [-rank(`x`), rank(`x`)). 9211 Default: ``-1`` , specifying the last dimension. 9212 9213 Inputs: 9214 - **x** (Tensor) - Input tensor. `x.shape[axis]` must be even. 9215 9216 Outputs: 9217 Tensor, has the same data type with `x`. 9218 9219 Supported Platforms: 9220 ``Ascend`` ``CPU`` 9221 9222 Examples: 9223 >>> from mindspore import ops, Tensor 9224 >>> from mindspore import dtype as mstype 9225 >>> import numpy as np 9226 >>> axis = 0 9227 >>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698, 9228 ... 0.5135, 0.5740, 0.3435, 0.1895, 0.8764, 9229 ... 0.4980, 0.9673, 0.9879, 0.6988, 0.9022, 9230 ... 0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32) 9231 >>> glu = ops.GLU(axis=axis) 9232 >>> y = glu(x) 9233 >>> print(y) 9234 [[[0.20028052 0.6916126 0.57412136 0.06512236 0.26307625] 9235 [0.3682598 0.3093122 0.17306386 0.10212085 0.63814086]]] 9236 """ 9237 9238 @prim_attr_register 9239 def __init__(self, axis=-1): 9240 """Initialize GLU""" 9241 validator.check_value_type("axis", axis, [int], self.name) 9242 9243 9244class FractionalMaxPoolWithFixedKsize(Primitive): 9245 r""" 9246 Applies a 2D fractional max pooling to an input signal composed of multiple input planes. 9247 The max-pooling operation is applied in :math:`(kH, kW)` regions by a stochastic step size determined by 9248 the target output size `output_shape`. 9249 9250 The number of output features is equal to the number of input planes. 9251 9252 Fractional MaxPooling is described in the paper `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_. 9253 9254 Args: 9255 ksize (Union[int, tuple[int]]): Size of the pooling window. `ksize` can be a tuple of two values 9256 specify a shape :math:`(k_H, k_W)`, or a single int `K` for :math:`(K, K)`. 9257 output_shape (Union[int, tuple[int]]): The target output shape. `output_shape` can be a 9258 tuple of two values specify a shape :math:`(H_{out}, W_{out})`, or a single float `S` for :math:`(S, S)`. 9259 data_format (str, optional): The optional value for data format, is ``'NCHW'`` . 9260 Default: ``"NCHW"`` . 9261 9262 Inputs: 9263 - **input_x** (Tensor) - Tensor of shape :math:`(N, C, H_{in}, W_{in})`, 9264 with float16, float32, float64, int32, int64 data type. 9265 - **random_samples** (Tensor) - Tensor of shape :math:`(N, C, 2)`. 9266 with float16, float32, float64 data type. 9267 9268 Outputs: 9269 - **y** (Tensor) - Has the same type as the `input_x`. 9270 Has the shape :math:`(N, C, H_{out}, W_{out})`. 9271 - **argmax** (Tensor) -A tensor whose data type must be int64. Has the same shape as the `y`. 9272 9273 Raises: 9274 TypeError: If data type of `input_x` is not one of the following: float16, float32, float64, int32, int64. 9275 TypeError: If data type of `random_samples` is not one of the following: float16, float32, float64. 9276 ValueError: If `ksize` is not a number and `ksize` is not a tuple of length 2. 9277 ValueError: If `output_shape` is not a number and `output_shape` is not a tuple of length 2. 9278 ValueError: If the sum of `ksize` , `output_shape` and 9279 -1 is larger than the corresponding dimension of `input_x`. 9280 ValueError: If the dimension of `random_samples` is not 3. 9281 ValueError: If the first dimension size of `input_x` and `random_samples` is not equal. 9282 ValueError: If the second dimension size of `input_x` and `random_samples` is not equal. 9283 ValueError: If the third dimension size of `random_samples` is not 2. 9284 9285 Supported Platforms: 9286 ``CPU`` 9287 9288 Examples: 9289 >>> # the ksize is an int number and the output_shape is a tuple. 9290 >>> ksize = 2 9291 >>> output_shape = (2,2) 9292 >>> data_format = "NCHW" 9293 >>> input_x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698, 9294 ... 0.5135, 0.5740, 0.3435, 0.1895, 0.8764, 9295 ... 0.9581, 0.4760, 0.9014, 0.8522, 0.3664, 9296 ... 0.4980, 0.9673, 0.9879, 0.6988, 0.9022, 9297 ... 0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([1, 1, 5, 5]), mstype.float32) 9298 >>> random_samples = Tensor(np.array([[[0.8, 0.8]]]), mstype.float32) 9299 >>> net = ops.FractionalMaxPoolWithFixedKsize(ksize, output_shape, data_format) 9300 >>> y, argmax = net(input_x, random_samples) 9301 >>> print(y) 9302 [[[[0.9545 0.8764] 9303 [0.9673 0.9852]]]] 9304 >>> print(argmax) 9305 [[[[ 1 9] 9306 [16 24]]]] 9307 """ 9308 9309 @prim_attr_register 9310 def __init__(self, ksize, output_shape, data_format="NCHW"): 9311 """Initialize FractionalMaxPoolWithFixedKsize.""" 9312 validator.check_value_type('ksize', ksize, [int, tuple], self.name) 9313 self.ksize = _check_positive_int_or_tuple( 9314 "ksize", ksize, self.name, allow_four=False, ret_four=False) 9315 self.add_prim_attr("ksize", self.ksize) 9316 validator.check_value_type('output_shape', output_shape, [int, tuple], self.name) 9317 self.output_shape = _check_positive_int_or_tuple( 9318 "output_shape", output_shape, self.name, allow_four=False, ret_four=False) 9319 self.add_prim_attr("output_shape", self.output_shape) 9320 self.data_format = validator.check_string(data_format, ['NCHW'], 'data_format', self.name) 9321 self.init_prim_io_names(inputs=['input_x', 'random_samples'], outputs=['y', 'argmax']) 9322 9323 9324class ChannelShuffle(Primitive): 9325 r""" 9326 Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` group and 9327 rearrange them as :math:`(*, \frac C g, g, H*W)`, while keeping the original tensor shapes. 9328 9329 .. warning:: 9330 This is an experimental API that is subject to change or deletion. 9331 9332 Refer to :func:`mindspore.ops.channel_shuffle` for more detail. 9333 9334 Args: 9335 group (int): Number of group to divide channels in. 9336 9337 Inputs: 9338 - **x** (Tensor) - Tensor to be divided, it has shape :math:`(*, C, H, W)`, 9339 with float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 data type. 9340 9341 Outputs: 9342 A Tensor, has the same type as the `x`, and has the shape :math:`(*, C, H, W)`. 9343 9344 Supported Platforms: 9345 ``Ascend`` ``CPU`` 9346 9347 Examples: 9348 >>> import numpy as np 9349 >>> from mindspore import Tensor, ops 9350 >>> group = 2 9351 >>> x = Tensor(np.arange(1 * 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16)) 9352 >>> channel_shuffle_func = ops.ChannelShuffle(group) 9353 >>> y = channel_shuffle_func(x) 9354 >>> print(y) 9355 [[[[ 0 1] 9356 [ 2 3]] 9357 [[ 8 9] 9358 [10 11]] 9359 [[ 4 5] 9360 [ 6 7]] 9361 [[12 13] 9362 [14 15]]]] 9363 """ 9364 9365 @prim_attr_register 9366 def __init__(self, group): 9367 """Initialize ChannelShuffle""" 9368 if not isinstance(group, int): 9369 raise ValueError(f"For '{self.name}', attr 'group' must be an positive int number") 9370 self.init_prim_io_names(inputs=['x'], outputs=['y']) 9371 9372 9373class MaxPoolWithArgmaxV2(Primitive): 9374 r""" 9375 Performs max pooling on the input Tensor and returns both max values and indices. 9376 9377 Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs 9378 regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size 9379 :math:`(h_{ker}, w_{ker})` and stride :math:`(s_0, s_1)`, the operation is as follows: 9380 9381 .. math:: 9382 \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 9383 \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) 9384 9385 .. warning:: 9386 This is an experimental API that is subject to change or deletion. 9387 9388 Args: 9389 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax 9390 value, is an int number that represents height and width of the kernel, or a tuple of 9391 two int numbers that represent height and width respectively. 9392 strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents 9393 not only the height of movement but also the width of movement, or a tuple of two int numbers that 9394 represent height and width of movement respectively. Default: ``None`` , meaning that 9395 `strides = kernel_size`. 9396 pads (Union[int, tuple[int]], optional): An int number that represents the depth, 9397 height and width of movement are both strides, or a tuple of two int numbers that represent 9398 depth, height and width of movement respectively. 9399 Default: 0. 9400 dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. Default: ``(1, 1)`` . 9401 ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` . 9402 argmax_type (mindspore.dtype, optional) : The dtype for argmax. 9403 Default: ``mstype.int64`` . [Disabled in Ascend.] 9404 9405 Inputs: 9406 - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8, 9407 int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64 in CPU and GPU, 9408 with that of float16 in Ascend. 9409 9410 Outputs: 9411 Tuple of 2 Tensors, representing the maxpool result and where the max values are generated. 9412 9413 - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`. 9414 It has the same data type as `x`. 9415 9416 .. math:: 9417 H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{pads[0]} - \text{dilation[0]} 9418 \times (\text{kernel_size[0]} - 1) - 1}{\text{strides[0]}} + 1\right\rfloor 9419 9420 .. math:: 9421 W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{pads[1]} - \text{dilation[1]} 9422 \times (\text{kernel_size[1]} - 1) - 1}{\text{strides[1]}} + 1\right\rfloor 9423 9424 - **argmax** (Tensor) - Index corresponding to the maximum value. 9425 Data type is int32 or int64 in GPU and CPU, is uint16 in Ascend. 9426 9427 Raises: 9428 TypeError: If `x` is not a Tensor. 9429 ValueError: If length of shape of `x` is not equal to 4. 9430 TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple. 9431 ValueError: If `kernel_size`, `strides` or `dilation` is less than 1. 9432 ValueError: If `pads` is less than 0. 9433 ValueError: If `pads` is more than half of `kernel_size`. 9434 ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32. 9435 TypeError: If `ceil_mode` is not bool. 9436 9437 Supported Platforms: 9438 ``Ascend`` ``GPU`` ``CPU`` 9439 9440 Examples: 9441 >>> import mindspore 9442 >>> import numpy as np 9443 >>> from mindspore import Tensor, ops 9444 >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32) 9445 >>> maxpool_arg_v2_op = ops.MaxPoolWithArgmaxV2(kernel_size=(3, 2), strides=(2, 1)) 9446 >>> output_tensor, argmax = maxpool_arg_v2_op(x) 9447 >>> print(output_tensor.shape) 9448 (20, 16, 24, 31) 9449 >>> print(argmax.shape) 9450 (20, 16, 24, 31) 9451 """ 9452 9453 @prim_attr_register 9454 def __init__(self, kernel_size, strides=None, pads=0, dilation=(1, 1), ceil_mode=False, argmax_type=mstype.int64): 9455 """Initialize MaxPoolWithArgmaxV2.""" 9456 self.init_prim_io_names(inputs=["x"], outputs=["output", "argmax"]) 9457 validator.check_value_type("ceil_mode", ceil_mode, bool, self.name) 9458 self.ceil_mode = ceil_mode 9459 validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name) 9460 argmax_type_valid_values = (mstype.int32, mstype.int64) 9461 validator.check_type_name("argmax_type", argmax_type, argmax_type_valid_values, self.name) 9462 if argmax_type == mstype.int32: 9463 self.add_prim_attr("argmax_type", 3) 9464 elif argmax_type == mstype.int64: 9465 self.add_prim_attr("argmax_type", 4) 9466 else: 9467 raise ValueError( 9468 f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, but got {argmax_type}.") 9469 self.kernel_size = _check_positive_int_or_tuple("kernel_size", kernel_size, self.name, ret_four=True) 9470 if strides is None: 9471 strides = kernel_size 9472 self.strides = _check_positive_int_or_tuple("strides", strides, self.name, ret_four=True) 9473 self.pads = _check_positive_int_or_tuple("pads", pads, self.name, ret_four=True, strict_positive=False) 9474 self.dilation = _check_positive_int_or_tuple("dilation", dilation, self.name, ret_four=True) 9475 self.add_prim_attr("kernel_size", self.kernel_size) 9476 self.add_prim_attr("strides", self.strides) 9477 self.add_prim_attr("pads", self.pads) 9478 self.add_prim_attr("dilation", self.dilation) 9479 self.add_prim_attr("ceil_mode", self.ceil_mode) 9480 9481 9482class WKV(Primitive): 9483 r""" 9484 The WKV computation is similar to AFT(Zhai et al., 2021), but W is now a channel-wise vector multiplied 9485 by relative position rather than a pairwise matrix in AFT. We also introduce a vector U for separately 9486 attending to the current token in order to compensate for potential degeneration of W. 9487 9488 Inputs: 9489 - **w** (Tensor) - The time_first tensor with data type of float32. 9490 Input tensor of shape :math:`(hidden\_size,)`. 9491 - **u** (Tensor]) - The time_decay tensor with data type of float32. 9492 Input tensor of shape :math:`(hidden\_size,)`. 9493 - **k** (Tensor) - The key tensor with data type of float32. 9494 Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`. 9495 - **v** (Tensor) - The value tensor with data type of float32. 9496 Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`. 9497 - **sp** (Tensor) - The states_p tensor with data type of float32. 9498 Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`. 9499 - **sq** (Tensor) - The states_q tensor with data type of float32. 9500 Input tensor of shape :math:`(batch\_size, hidden\_size)`. 9501 - **sm** (Tensor) - The states_m tensor with data type of float32. 9502 Input tensor of shape :math:`(batch\_size, hidden\_size)`. 9503 9504 Outputs: 9505 Tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`. 9506 9507 Supported Platforms: 9508 ``Ascend`` 9509 9510 Examples: 9511 >>> from mindspore.ops.operations import nn_ops 9512 >>> b = 32 9513 >>> t = 2 9514 >>> c = 128 9515 >>> w = Tensor(np.random.randn(c).astype(np.float32)) 9516 >>> u = Tensor(np.random.randn(c).astype(np.float32)) 9517 >>> k = Tensor(np.random.randn(b, t, c).astype(np.float32)) 9518 >>> v = Tensor(np.random.randn(b, t, c).astype(np.float32)) 9519 >>> sp = Tensor(np.random.randn(b, c).astype(np.float32)) 9520 >>> sq = Tensor(np.random.randn(b, c).astype(np.float32)) 9521 >>> sm = Tensor(np.random.randn(b, c).astype(np.float32)) 9522 >>> dense = nn_ops.WKV() 9523 >>> output = dense(w, u, k, v, sp, sq, sm) 9524 >>> print(output[0].shape) 9525 (32, 2, 128) 9526 """ 9527 9528 @prim_attr_register 9529 def __init__(self): 9530 """Initialize WKV.""" 9531 self.init_prim_io_names(inputs=["time_first", "time_decay", "key", "value", "sp", "sq", "sm"], 9532 outputs=["output", "out_sp", "out_sq", "out_sm"]) 9533 9534 9535class PromptFlashAttention(Primitive): 9536 r""" 9537 The interface for fully inference. 9538 B -- Batch size 9539 S -- Sequence length 9540 H -- Hidden size 9541 9542 Note: 9543 experiment ops 9544 9545 .. warning:: 9546 This is an experimental API that is subject to change or deletion. 9547 9548 Args: 9549 num_heads (int): The number of heads. 9550 scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of 9551 Muls in the calculation. Default: 1.0. 9552 pre_tokens (int): Previous tokens. Default: 2147483547. 9553 next_tokens (int): next tokens. Default: 0. 9554 indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0 9555 indicates that the data blocks in the upper triangle are not involved in the calculation 9556 input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`. 9557 num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm. 9558 The value o indicates if the key and value have the same head nums, use numHeads. Default: 0. 9559 sparse_mode (int): Default: 0 9560 inner_precise (int): 0, float16 high precision. 1, high performance. default 1 9561 9562 Inputs: 9563 - **query** (Tensor) - The query tensor with data type of float16 or float32. 9564 Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`. 9565 - **key** (Tensor) - The key tensor with data type of float16 or float32. 9566 Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`. 9567 - **value** (Tensor) - The value tensor with data type of float16 or float32. 9568 Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`. 9569 - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32. 9570 For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`. 9571 - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int64. 9572 - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int64. 9573 - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32. 9574 - **dep_scale1** (Tensor) 9575 - **quant_scale1** (Tensor) 9576 - **deq_scale2** (Tensor) 9577 - **quant_scale2** (Tensor) 9578 - **quant_offset2** (Tensor) 9579 9580 Outputs: 9581 - **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`. 9582 9583 Supported Platforms: 9584 ``Ascend`` 9585 9586 Examples: 9587 >>> import mindspore.ops.operations.nn_ops as P 9588 >>> from mindspore import Tensor 9589 >>> import numpy as np 9590 >>> B = 1 9591 >>> N = 16 9592 >>> S = 256 9593 >>> D = 16 9594 >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16)) 9595 >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16)) 9596 >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16)) 9597 >>> attn_mask = Tensor(np.ones((B, 1, S, S), dtype=np.float16)) 9598 >>> pfa = P.PromptFlashAttention(N, input_layout='BNSD') 9599 >>> out = pfa(query, key, value, attn_mask, None, None, None, None, None, None, None, None) 9600 >>> print(out.shape) 9601 (1, 16, 256, 16) 9602 """ 9603 9604 @prim_attr_register 9605 def __init__(self, num_heads, scale_value=1.0, pre_tokens=214748647, next_tokens=0, input_layout='BSH', 9606 num_key_value_heads=0, sparse_mode=0, inner_precise=1): 9607 """Initialize PromptFlashAttention.""" 9608 validator.check_value_type('num_heads', num_heads, [int], self.name) 9609 validator.check_value_type('scale_value', scale_value, [float], self.name) 9610 validator.check_value_type('pre_tokens', pre_tokens, [int], self.name) 9611 validator.check_value_type('next_tokens', next_tokens, [int], self.name) 9612 validator.check_value_type('input_layout', input_layout, [str], self.name) 9613 validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name) 9614 validator.check_value_type('sparse_mode', sparse_mode, [int], self.name) 9615 validator.check_value_type('inner_precise', inner_precise, [int], self.name) 9616 self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths", 9617 "actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1", 9618 "deq_scale2", "quant_scale2", "quant_offset2"], 9619 outputs=["attention_out"]) 9620 9621 9622class IncreFlashAttention(Primitive): 9623 r""" 9624 The interface for fully inference. 9625 9626 B -- Batch size 9627 9628 S -- Sequence length 9629 9630 H -- Hidden size 9631 9632 .. warning:: 9633 This is an experimental API that is subject to change or deletion. 9634 If there is no input parameter and no default value, None needs to be passed. 9635 9636 Args: 9637 - **num_heads** (int) - The number of heads. 9638 - **input_layout** (str) - the data layout of the input qkv, support `(BSH)` and `(BNSD)`. Default `BSH`. 9639 - **scale_value** (double) - The scale value indicating the scale coefficient, which is used as the scalar of 9640 Muls in the calculation. Default: 1.0. 9641 - **num_key_value_heads** (int) - head numbers of key/value which are used in GQA algorithm. 9642 The value o indicates if the key and value have the same head nums, use numHeads. Default: 0. 9643 - **block_size** (int) - Default: 0. 9644 - **inner_precise** (int) - Default: 1. 9645 9646 Inputs: 9647 - **query** (Tensor) - The query tensor with data type of float16 or bfloat16. 9648 Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`. 9649 - **key** (TensorList) - The key tensor with data type of float16 or bfloat16. 9650 Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`. 9651 - **value** (TensorList) - The value tensor with data type of float16 or bfloat16. 9652 Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`. 9653 - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or bool. 9654 Input tensor of shape :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`. 9655 - **actual_seq_lengths** (Tensor) - Describe actual sequence length of each input with data type of int. 9656 - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32. 9657 - **dequant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of uint64. 9658 - **quant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of float. 9659 - **dequant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of uint64. 9660 - **quant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of float. 9661 - **quant_offset2** (Tensor) - Quantitative parametor, the tensor with data type of float. 9662 - **antiquant_scale** (Tensor) - Quantitative parametor, the tensor with data type of float. 9663 - **antiquant_offset** (Tensor) - Quantitative parametor, the tensor with data type of float. 9664 - **block_table** (Tensor) - The tensor with data type of float. 9665 9666 Outputs: 9667 - **attention_out** (Tensor) - Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`. 9668 9669 Supported Platforms: 9670 ``Ascend`` 9671 """ 9672 9673 @prim_attr_register 9674 def __init__(self, num_heads, input_layout="BSH", scale_value=1.0, num_key_value_heads=0, block_size=0, 9675 inner_precise=1): 9676 """Initialize IncreFlashAttention.""" 9677 validator.check_value_type('num_heads', num_heads, [int], self.name) 9678 validator.check_value_type('input_layout', input_layout, [str], self.name) 9679 validator.check_value_type('scale_value', scale_value, [float], self.name) 9680 validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name) 9681 validator.check_value_type('block_size', block_size, [int], self.name) 9682 validator.check_value_type('inner_precise', inner_precise, [int], self.name) 9683 self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths", "pse_shift", 9684 "dequant_scale1", "quant_scale1", "dequant_scale2", "quant_scale2", 9685 "quant_offset2", "antiquant_scale", "antiquant_offset", "block_table"], 9686 outputs=["attention_out"]) 9687 9688 9689class AllFinite(Primitive): 9690 r""" 9691 Check all gradients is finite. 9692 """ 9693 @prim_attr_register 9694 def __init__(self): 9695 """Initialize""" 9696 self.init_prim_io_names(inputs=['gradients'], 9697 outputs=["is_finite"]) 9698