• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2023 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16"""Operators for nn."""
17from __future__ import absolute_import
18from __future__ import division
19
20import math
21from functools import partial
22from mindspore import log as logger
23from mindspore._checkparam import _check_3d_int_or_tuple
24from mindspore import context
25from mindspore.ops import signature as sig
26from mindspore import _checkparam as validator
27from mindspore.common import dtype as mstype
28from mindspore.common._decorator import deprecated
29from mindspore.ops.primitive import Primitive
30from mindspore.ops.primitive import PrimitiveWithInfer
31from mindspore.ops.primitive import PrimitiveWithCheck
32from mindspore.ops.primitive import prim_attr_register
33from ..auto_generate import (CeLU, Flatten, LogSoftmax, ReLU, ReLU6, Dense, Tanh,
34                             Elu, Sigmoid, Softmax, SoftplusExt, HSwish, HSigmoid, AvgPool, BiasAdd,
35                             NLLLoss, OneHot, GeLU, FastGeLU, PReLU, RmsNorm,
36                             GridSampler3D, GridSampler2D, LayerNorm, LayerNormExt, HShrink, AdamWeightDecay, Dropout,
37                             ApplyRotaryPosEmb, PagedAttention, PagedAttentionMask, ReshapeAndCache,
38                             FlashAttentionScore, Embedding, UpsampleNearest1D, UpsampleNearest2D,
39                             UpsampleNearest3D, UpsampleTrilinear3D,
40                             UpsampleBilinear2D, UpsampleLinear1D,
41                             BinaryCrossEntropy, BCEWithLogitsLoss)
42from .manually_defined import BatchNorm
43
44
45def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=False,
46                                 ret_four=False, strict_positive=True):
47    """
48    Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements.
49    """
50
51    def _raise_message():
52        raise ValueError(f"For '{prim_name}' attr '{arg_name}' must be an positive int number or a tuple of two "
53                         f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}")
54
55    def _get_return_value():
56        if isinstance(arg_value, int):
57            ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value)
58        elif len(arg_value) == 2:
59            ret = (1, 1, arg_value[0], arg_value[1]) if ret_four else arg_value
60        elif len(arg_value) == 4:
61            if not allow_four:
62                _raise_message()
63            ret = arg_value if ret_four else (arg_value[2], arg_value[3])
64        else:
65            _raise_message()
66        return ret
67
68    validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
69    ret_value = _get_return_value()
70    for item in ret_value:
71        if isinstance(item, int) and not isinstance(item, bool):
72            if item > 0:
73                continue
74            if not strict_positive and item == 0:
75                continue
76        _raise_message()
77    return ret_value
78
79
80def _check_shape(arg_name, arg_value, prim_name):
81    """
82    Checks whether an shape dims is a positive int elements.
83    """
84
85    def _raise_message():
86        raise ValueError(f"For '{prim_name}' attr '{arg_name}' dims elements must be positive int numbers, "
87                         f"but got {arg_value}")
88
89    validator.check_value_type(arg_name, arg_value, (list, tuple), prim_name)
90    for item in arg_value:
91        if isinstance(item, int) and item > 0:
92            continue
93        _raise_message()
94    return arg_value
95
96
97def _update_attr_by_format(arg_value, arg_format):
98    """
99    If the format is NHWC, should modify the strides or dilation shape.
100    """
101    ret = arg_value
102    if len(arg_value) == 4 and arg_format == "NHWC":
103        ret = arg_value[1:] + (1,)
104
105    return ret
106
107
108class AdaptiveAvgPool3D(Primitive):
109    r"""
110    AdaptiveAvgPool3D operation.
111
112    .. warning::
113        This is an experimental API that is subject to change or deletion.
114
115    Refer to :func:`mindspore.ops.adaptive_avg_pool3d` for more details.
116
117    Args:
118        output_size (Union[int, tuple]): Specify the size of output tensor. It
119            can be a single int or a tuple of three ints.
120
121    Inputs:
122        - **x** (Tensor) - The input of AdaptiveAvgPool3D, which is a 5D or 4D tensor.
123
124    Outputs:
125        Tensor, with the same type as the `x`.
126
127    Supported Platforms:
128        ``Ascend`` ``GPU`` ``CPU``
129
130    Examples:
131        >>> import mindspore
132        >>> import numpy as np
133        >>> from mindspore import nn, Tensor
134        >>> from mindspore.ops import AdaptiveAvgPool3D
135        >>> class AdaptiveAvgPool3DNet(nn.Cell):
136        ...     def __init__(self, output_size):
137        ...         super(AdaptiveAvgPool3DNet, self).__init__()
138        ...         self.output_size_ = output_size
139        ...         self.adaptive_avg_pool_3d = AdaptiveAvgPool3D(self.output_size_)
140        ...     def construct(self, x_):
141        ...         return self.adaptive_avg_pool_3d(x_)
142        ...
143        >>> output_size=(1,1,1)
144        >>> input_x_val = np.zeros((1,1,2,2,2))
145        >>> input_x_val[:,:,0,:,:]  += 1
146        >>> input_x = Tensor(input_x_val, mindspore.float32)
147        >>> adaptive_avg_pool_3d = AdaptiveAvgPool3DNet(output_size)
148        >>> output = adaptive_avg_pool_3d(input_x)
149        >>> print(output)
150        [[[[[0.5]]]]]
151    """
152
153    @prim_attr_register
154    def __init__(self, output_size):
155        validator.check_value_type("output_size", output_size, [int, tuple], self.name)
156        self.output_size = (output_size,) * 3 if isinstance(self.output_size, int) else output_size
157        for i, size in enumerate(self.output_size):
158            validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
159            if size is not None:
160                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
161
162        self.output_size = tuple(-1 if val is None else val for val in self.output_size)
163
164        self.add_prim_attr('output_size', self.output_size)
165        self.init_prim_io_names(inputs=['x'], outputs=['y'])
166
167
168class AdaptiveAvgPool2D(Primitive):
169    r"""
170    AdaptiveAvgPool2D operation.
171
172    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more details.
173
174    .. warning::
175        This is an experimental API that is subject to change or deletion.
176
177    Args:
178        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
179            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
180            If it is None, it means the output size is the same as the input size.
181
182    Inputs:
183        - **input_x** (Tensor) - The input of AdaptiveAvgPool2D, which is a 3D or 4D tensor,
184          with float16 ,float32 or float64 data type.
185
186    Outputs:
187        Tensor, with the same type as the `input_x`.
188
189    Supported Platforms:
190        ``Ascend`` ``GPU`` ``CPU``
191
192    Examples:
193        >>> import mindspore
194        >>> import numpy as np
195        >>> from mindspore import Tensor, ops
196        >>> # case 1: output_size=(None, 2)
197        >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
198        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
199        ...                            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]), mindspore.float32)
200        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
201        >>> output = adaptive_avg_pool_2d(input_x)
202        >>> print(output)
203        [[[1.5 2.5]
204          [4.5 5.5]
205          [7.5 8.5]]
206         [[1.5 2.5]
207          [4.5 5.5]
208          [7.5 8.5]]
209         [[1.5 2.5]
210          [4.5 5.5]
211          [7.5 8.5]]]
212        >>> # case 2: output_size=2
213        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
214        >>> output = adaptive_avg_pool_2d(input_x)
215        >>> print(output)
216        [[[3. 4.]
217          [6. 7.]]
218         [[3. 4.]
219          [6. 7.]]
220         [[3. 4.]
221          [6. 7.]]]
222        >>> # case 3: output_size=(1, 2)
223        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
224        >>> output = adaptive_avg_pool_2d(input_x)
225        >>> print(output)
226        [[[4.5 5.5]]
227         [[4.5 5.5]]
228         [[4.5 5.5]]]
229    """
230
231    @prim_attr_register
232    def __init__(self, output_size):
233        """Initialize AdaptiveAvgPool2D."""
234        self.init_prim_io_names(inputs=['x'], outputs=['y'])
235        validator.check_value_type("output_size", output_size, [int, tuple], self.name)
236        if isinstance(output_size, tuple):
237            validator.check_int(len(output_size), 2, validator.EQ, 'length of output_size', self.name)
238        self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
239        for i, size in enumerate(self.output_size):
240            validator.check_value_type(f"output_size[{i}]", size, [int, type(None)], self.name)
241            if size is not None:
242                validator.check_number(f"output_size[{i}]", size, 0, validator.GE, self.name)
243
244        self.output_size = tuple(-1 if val is None else val for val in self.output_size)
245        self.add_prim_attr('output_size', self.output_size)
246
247
248class AdaptiveMaxPool2D(Primitive):
249    r"""
250    Performs 2D adaptive max pooling on a multi-plane input signal.
251
252    Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
253
254    Args:
255        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
256            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
257            If it is None, it means the output size is the same as the input size.
258
259    Inputs:
260        - **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor,
261          with float16, float32 or float64 data type.
262
263    Outputs:
264        Tensor, with the same type as the `input_x`.
265
266    Supported Platforms:
267        ``Ascend`` ``GPU`` ``CPU``
268
269    Examples:
270        >>> # case 1: output_size=(None, 2)
271        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
272        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
273        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
274        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2))
275        >>> output = adaptive_max_pool_2d(input_x)
276        >>> print(output[0])
277        [[[[2. 3.]
278           [5. 6.]
279           [8. 9.]]
280          [[2. 3.]
281           [5. 6.]
282           [8. 9.]]
283          [[2. 3.]
284           [5. 6.]
285           [8. 9.]]]]
286        >>> # case 2: output_size=2
287        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2)
288        >>> output = adaptive_max_pool_2d(input_x)
289        >>> print(output[0])
290        [[[[5. 6.]
291           [8. 9.]]
292          [[5. 6.]
293           [8. 9.]]
294          [[5. 6.]
295           [8. 9.]]]]
296        >>> # case 3: output_size=(1, 2)
297        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2))
298        >>> output = adaptive_max_pool_2d(input_x)
299        >>> print(output[0])
300        [[[[8. 9.]]
301          [[8. 9.]]
302          [[8. 9.]]]]
303    """
304
305    @prim_attr_register
306    def __init__(self, output_size):
307        """Initialize AdaptiveMaxPool2D."""
308        validator.check_value_type("output_size", output_size, [int, tuple], self.name)
309        if isinstance(output_size, tuple):
310            validator.check_int(len(output_size), 2, validator.EQ,
311                                'length of output_size', self.name)
312        self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
313        self.output_size = (-1 if self.output_size[0] is None else self.output_size[0],
314                            -1 if self.output_size[1] is None else self.output_size[1])
315        for size in self.output_size:
316            validator.check_number("output_size", size, -1, validator.GE, None)
317        self.add_prim_attr('output_size', self.output_size)
318
319
320class AdaptiveMaxPool3D(Primitive):
321    r"""
322    Performs 3D adaptive max pooling on a multi-plane input signal.
323
324    Refer to :func:`mindspore.ops.adaptive_max_pool3d` for more details.
325
326    Inputs:
327        - **x** (Tensor) - Tensor, with shape :math:`(C, D, H, W)` or :math:`(N, C, D, H, W)`.
328        - **output_size** (Union[int, tuple]) - The specified output size, which is an integer that represents depth,
329          height and width, or a tuple of three int numbers that represent depth, height and width respectively.
330          The value must be a positive integer. If it is None, the output size and input size of the corresponding
331          dimension are the same.
332
333    Outputs:
334        - **y** (Tensor) - Tensor, with the same number of dims and data type as the `input`.
335        - **argmax** (Tensor) - Tensor, the indices of max value, which has the same shape as the
336          `y` and it's data type is int32.
337
338    Supported Platforms:
339        ``GPU`` ``CPU``
340
341    Examples:
342        >>> class AdaptiveMaxPool3DNet(nn.Cell):
343        ...     def __init__(self):
344        ...         super(AdaptiveMaxPool3DNet, self).__init__()
345        ...         self.adaptive_max_pool_3d = ops.AdaptiveMaxPool3D()
346        ...     def construct(self, x_, output_size_):
347        ...         return self.adaptive_max_pool_3d(x_, output_size_)
348        >>> x = np.arange(0,36).reshape((1, 3, 3, 4)).astype(np.float32)
349        >>> output_size = np.array([1, 1, 2], dtype=np.int32)
350        >>> net = AdaptiveMaxPool3DNet()
351        >>> output = net(Tensor(x), Tensor(output_size))
352        >>> print(output[0].asnumpy())
353        [[[[33. 35.]]]]
354        >>> print(output[1].asnumpy())
355        [[[[33 35]]]]
356    """
357
358    @prim_attr_register
359    def __init__(self):
360        self.init_prim_io_names(inputs=['x', 'output_size'], outputs=['y', 'argmax'])
361
362
363class Softplus(Primitive):
364    r"""
365    Softplus activation function.
366
367    Softplus is a smooth approximation to the ReLU function.
368    It can be used to constrain the output of a machine to always be positive.
369    The function is shown as follows:
370
371    .. math::
372
373        \text{output} = \log(1 + \exp(\text{x}))
374
375    Inputs:
376        - **input_x** (Tensor) - Tensor of any dimension.
377          Supported dtypes:
378
379          - GPU/CPU: float16, float32, float64.
380          - Ascend: float16, float32.
381
382    Outputs:
383        Tensor, with the same type and shape as the `input_x`.
384
385    Raises:
386        TypeError: If `input_x` is not a Tensor.
387        TypeError: If the dtype of `input_x` is not float16, float32 or float64.
388
389    Supported Platforms:
390        ``Ascend``  ``GPU`` ``CPU``
391
392    Examples:
393        >>> import mindspore
394        >>> import numpy as np
395        >>> from mindspore import Tensor, ops
396        >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
397        >>> softplus = ops.Softplus()
398        >>> output = softplus(input_x)
399        >>> print(output)
400        [1.3132615 2.126928  3.0485873 4.01815   5.0067153]
401    """
402
403    @prim_attr_register
404    def __init__(self):
405        """Initialize Softplus"""
406        self.init_prim_io_names(inputs=['x'], outputs=['output'])
407
408
409class Softsign(Primitive):
410    r"""
411    Softsign activation function.
412
413    Refer to :func:`mindspore.ops.softsign` for more details.
414
415    Inputs:
416        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
417          additional dimensions, with float16 or float32 data type.
418
419    Outputs:
420        Tensor, with the same type and shape as the `input_x`.
421
422    Supported Platforms:
423        ``Ascend`` ``GPU`` ``CPU``
424
425    Examples:
426        >>> import mindspore
427        >>> import numpy as np
428        >>> from mindspore import Tensor, ops
429        >>> input_x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
430        >>> softsign = ops.Softsign()
431        >>> output = softsign(input_x)
432        >>> print(output)
433        [ 0.        -0.5         0.6666667  0.9677419 -0.9677419]
434    """
435
436    @prim_attr_register
437    def __init__(self):
438        """Initialize Softsign"""
439        self.init_prim_io_names(inputs=['x'], outputs=['output'])
440
441
442class ReLUV3(Primitive):
443    r"""
444    Computes ReLUV3 (Rectified Linear Unit activation function) of input tensors element-wise.
445
446    It returns max(x, 0) element-wise. Specially, the neurons with the negative output
447    will be suppressed and the active neurons will stay the same.
448
449    .. math::
450
451        ReLUV3(x) = (x)^+ = max(0, x)
452
453    Inputs:
454        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
455          additional dimensions, data type is
456          `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore.html#mindspore.dtype>`_.
457
458    Outputs:
459        Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`.
460
461    Raises:
462        TypeError: If `input_x` is not a Tensor.
463
464    Supported Platforms:
465        ``Ascend`` ``CPU``
466
467    Examples:
468        >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
469        >>> relu_v3 = ops.ReLUV3()
470        >>> output = relu_v3(input_x)
471        >>> print(output)
472        [[0. 4. 0.]
473         [2. 0. 9.]]
474    """
475
476    @prim_attr_register
477    def __init__(self):
478        """Initialize ReLUV3"""
479        self.init_prim_io_names(inputs=['x'], outputs=['output'])
480
481
482class Mish(PrimitiveWithInfer):
483    r"""
484    Computes MISH(A Self Regularized Non-Monotonic Neural Activation Function) of input tensors element-wise.
485
486    Refer to :func:`mindspore.ops.mish` for more details.
487
488    Inputs:
489        - **x** (Tensor) - The input Tensor.
490          Supported dtypes:
491
492          - GPU/CPU: float16, float32, float64.
493          - Ascend: float16, float32.
494
495    Outputs:
496        Tensor, with the same type and shape as the `x`.
497
498    Supported Platforms:
499        ``Ascend`` ``GPU`` ``CPU``
500
501    Examples:
502        >>> import mindspore
503        >>> import numpy as np
504        >>> from mindspore import Tensor, ops
505        >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
506        >>> mish = ops.Mish()
507        >>> output = mish(x)
508        >>> print(output.shape)
509        (2, 3)
510        >>> x = Tensor(2.1, mindspore.float32)
511        >>> output = mish(x)
512        >>> print(output)
513        2.050599
514    """
515
516    @prim_attr_register
517    def __init__(self):
518        """Initialize Mish"""
519        self.init_prim_io_names(inputs=['x'], outputs=['output'])
520
521
522class SeLU(Primitive):
523    r"""
524    Activation function SeLU (Scaled exponential Linear Unit).
525
526    The activation function is defined as:
527
528    .. math::
529        E_{i} =
530        scale *
531        \begin{cases}
532        x_{i}, &\text{if } x_{i} \geq 0; \cr
533        \text{alpha} * (\exp(x_i) - 1), &\text{otherwise.}
534        \end{cases}
535
536    where :math:`alpha` and :math:`scale` are pre-defined constants(:math:`alpha=1.67326324`
537    and :math:`scale=1.05070098`).
538
539    See more details in `Self-Normalizing Neural Networks <https://arxiv.org/abs/1706.02515>`_.
540
541    Inputs:
542        - **input_x** (Tensor) - Tensor of any dimension.
543          The data type is int8, int32, float16, float32, float64(only CPU, GPU).
544
545    Outputs:
546        Tensor, with the same type and shape as the `input_x`.
547
548    Raises:
549        TypeError: If dtype of `input_x` is not int8, int32, float16, float32, float64.
550
551    Supported Platforms:
552        ``Ascend`` ``GPU`` ``CPU``
553
554    Examples:
555        >>> import mindspore
556        >>> import numpy as np
557        >>> from mindspore import Tensor, ops
558        >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
559        >>> selu = ops.SeLU()
560        >>> output = selu(input_x)
561        >>> print(output)
562        [[-1.1113307 4.202804 -1.7575096]
563        [ 2.101402 -1.7462534 9.456309 ]]
564    """
565
566    @prim_attr_register
567    def __init__(self):
568        """Initialize SeLU"""
569        self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
570
571
572
573
574class FusedBatchNorm(Primitive):
575    r"""
576    The FusedBatchNorm interface is deprecated, please use the BatchNorm interface.
577    """
578
579    def __init__(self, mode=0, epsilon=1e-5, momentum=0.1):
580        raise TypeError("The FusedBatchNorm interface is deprecated, please use the BatchNorm interface.")
581
582
583class FusedBatchNormEx(PrimitiveWithCheck):
584    r"""
585    The FusedBatchNormEx interface is deprecated, please use the BatchNorm interface.
586    """
587
588    def __init__(self, mode=0, epsilon=1e-5, momentum=0.1, data_format="NCHW"):
589        raise TypeError("FusedBatchnormEx interface is deprecated, please use BatchNorm interface.")
590
591
592class InstanceNorm(PrimitiveWithInfer):
593    r"""
594    Instance Normalization over a 4D input.
595
596    This operator applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with
597    additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for
598    Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch
599    of data and the learned parameters which can be described in the following formula.
600
601    .. math::
602
603        y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
604
605    where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
606
607    Args:
608        epsilon (float): A small value added for numerical stability. Default: ``1e-5`` .
609        momentum (float): The hyper parameter to compute moving average for running_mean and running_var
610            (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
611            Momentum value must be [0, 1]. Default: ``0.1`` .
612
613    Inputs:
614        - **input_x** (Tensor) - The input of InstanceNorm, Tensor of shape :math:`(N, C)`,
615          data type: float16 or float32.
616        - **gamma** (Parameter) - Scale, Tensor of shape :math:`(C,)`,
617          data type: float32.
618        - **beta** (Parameter) - Bias, Tensor of shape :math:`(C,)`,
619          data type: float32.
620        - **mean** (Parameter) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
621        - **variance** (Parameter) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
622
623    Outputs:
624        Tuple of 3 Tensors, the normalized input, the updated parameters.
625
626        - **output_x** (Tensor) - The output of InstanceNorm, same type and shape as the `input_x`.
627        - **updated_moving_mean** (Tensor) - Updated mean value, Tensor of shape :math:`(NC,)`, data type: float32.
628        - **updated_moving_variance** (Tensor) - Updated variance value, Tensor of shape :math:`(NC,)`,
629          data type: float32.
630
631    Supported Platforms:
632        ``GPU``
633
634    Raises:
635        TypeError: If `epsilon` or `momentum` is not a float.
636        TypeError: If dtype of `input_x` is neither float16 nor float32.
637        TypeError: If dtype of `gamma`, `beta` or `mean` is not float32.
638        ValueError: If `epsilon` is not in the range of [0, 1).
639        ValueError: If `momentum` is not in the range of [0, 1].
640
641    Examples:
642        >>> class InstanceNormNet(nn.Cell):
643        >>>     def __init__(self):
644        >>>         super(InstanceNormNet, self).__init__()
645        >>>         self.instance_norm = ops.InstanceNorm()
646        >>>         self.gamma = Parameter(Tensor(np.ones([64]), mindspore.float32), name="gamma")
647        >>>         self.beta = Parameter(Tensor(np.ones([64]), mindspore.float32), name="beta")
648        >>>         self.mean = Parameter(Tensor(np.ones([64]), mindspore.float32), name="mean")
649        >>>         self.variance = Parameter(Tensor(np.ones([64]), mindspore.float32), name="variance")
650        >>>
651        >>>     def construct(self, input_x):
652        >>>         out = self.instance_norm(input_x, self.gamma, self.beta, self.mean, self.variance)
653        >>>         return out
654        >>>
655        >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
656        >>> net = InstanceNormNet()
657        >>> output = net(input_x)
658        >>> result = output[0].shape
659        >>> print(result)
660        (128, 64, 32, 64)
661    """
662    __mindspore_signature__ = (
663        sig.make_sig('input_x', dtype=sig.sig_dtype.T2),
664        sig.make_sig('gamma', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
665        sig.make_sig('beta', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
666        sig.make_sig('mean', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
667        sig.make_sig('variance', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
668    )
669
670    @prim_attr_register
671    def __init__(self, epsilon=1e-5, momentum=0.1):
672        """Initialize InstanceNorm."""
673        self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'],
674                                outputs=['y', 'save_mean', 'save_variance'])
675        self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
676        self.momentum = validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
677        self._update_parameter = True
678        self.add_prim_attr('side_effect_mem', True)
679
680
681class InstanceNormV2(Primitive):
682    r"""
683    Instance Normalization over a 4D or 5D input.
684
685    This operator applies Instance Normalization over a 4D or 5D input (a mini-batch of 2D inputs with
686    additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for
687    Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch
688    of data and the learned parameters which can be described in the following formula.
689
690    .. math::
691
692        y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
693
694    where :math:`\gamma` is scale(gamma), :math:`\beta` is bias(beta), :math:`\epsilon` is epsilon.
695
696    Note:
697        The format of input `x` support ``NCHW`` and ``NC1HWC0`` in platform ``CPU`` and ``Ascend``.
698        When attr `is_training` is `False`, this module does not tracks the running mean and variance.
699        The output `batch_mean` and `batch_variance` would be all zero.
700
701    Args:
702        is_training(bool): An optional boolean value. Default: ``True``.
703            When set to ``True``, this module tracks the running mean and variance.
704            When set to ``False``, this module does not track such statistics and always uses batch
705            statistics in both training and eval modes.
706        momentum (float): The hyper parameter to compute moving average for running_mean and running_var
707            (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
708            Momentum value must be [0, 1]. Default: ``0.1`` .
709        epsilon (float): A small value added to the denominator for numerical stability.
710            Epsilon value must be [0, 1). Default: ``1e-5`` .
711
712    Inputs:
713        - **x** (Tensor) - The input of InstanceNormV2, Tensor of shape :math:`(N, C, H, W)`
714          or :math:`(N, C1, H, W, C0)`, data type: float16 or float32.
715        - **gamma** (Tensor) - Scale, Shape depends on the shape of input `x`, data type: float32.
716          If `x` shape is :math:`(N, C, H, W)`, shape of `gamma` is :math:`(N, C, 1, 1)`.
717          If `x` shape is :math:`(N, C1, H, W, C0)`, shape of `gamma` is :math:`(N, C1, 1, 1, C0)`.
718        - **beta** (Tensor) - Bias, has the same shape and data type as `gamma`.
719        - **mean** (Tensor) - Mean value, has the same shape and data type as `gamma`.
720        - **variance** (Tensor) - Variance value, has the same shape and data type as `gamma`.
721
722    Outputs:
723        Tuple of 3 Tensors, the normalized input, the mean and variance of batch input.
724
725        - **y** (Tensor) - The output of InstanceNormV2, same type and shape as the `x`.
726        - **batch_mean** (Tensor) - The mean value of batch input, same type and shape as the input `mean`.
727        - **batch_variance** (Tensor) - The variance value of batch input, same type and shape as the input `variance`.
728
729    Supported Platforms:
730        ``Ascend`` ``CPU``
731
732    Raises:
733        TypeError: If either item in the inputs is not Tensor.
734        TypeError: If data type of `x` is neither float16 nor float32.
735        TypeError: If data type of `gamma` is not a Tensor of float32.
736        TypeError: If data type of `beta` is not a Tensor of float32.
737        TypeError: If data type of `mean` is not a Tensor of float32.
738        TypeError: If data type of `variance` is not a Tensor of float32.
739        TypeError: If data type of attr `is_training` is not bool.
740        TypeError: If data type of attr `momentum` is not float.
741        TypeError: If data type of attr `epsilon` is not float.
742        ValueError: If :math:`H * W <= 1` in input `x`.
743        ValueError: If the shape of either item in the inputs is neither 4D nor 5D.
744        ValueError: If `epsilon` is not in the range of [0, 1).
745        ValueError: If `momentum` is not in the range of [0, 1].
746
747    Examples:
748        >>> x = Tensor(input_data=np.random.randn(128, 48, 32, 64, 12), dtype=mindspore.float32)
749        >>> gamma = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
750        >>> beta = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
751        >>> mean = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
752        >>> var = Tensor(input_data=np.random.randn(128, 48, 1, 1, 12), dtype=mstype.float32)
753        >>> ops = P.InstanceNormV2()
754        >>> output = ops(x, gamma, beta, mean, var)
755        >>> y_shape = output[0].shape
756        >>> print(y_shape)
757        (128, 48, 32, 64, 12)
758        >>> batch_mean_shape = output[1].shape
759        >>> print(batch_mean_shape)
760        (128, 48, 1, 1, 12)
761        >>> batch_var_shape = output[2].shape
762        >>> print(batch_var_shape)
763        (128, 48, 1, 1, 12)
764    """
765    __mindspore_signature__ = (
766        sig.make_sig('x', dtype=sig.sig_dtype.T1),
767        sig.make_sig('gamma', dtype=sig.sig_dtype.T),
768        sig.make_sig('beta', dtype=sig.sig_dtype.T),
769        sig.make_sig('mean', dtype=sig.sig_dtype.T),
770        sig.make_sig('variance', dtype=sig.sig_dtype.T),
771    )
772
773    @prim_attr_register
774    def __init__(self, is_training=True, momentum=0.1, epsilon=1e-5):
775        """Initialize InstanceNormV2."""
776        self.init_prim_io_names(inputs=['x', 'gamma', 'beta', 'mean', 'variance'],
777                                outputs=['y', 'batch_mean', 'batch_variance'])
778        validator.check_is_float(epsilon, 'epsilon', self.name)
779        validator.check_is_float(momentum, 'momentum', self.name)
780        validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', self.name)
781        validator.check_float_range(momentum, 0, 1, validator.INC_BOTH, 'momentum', self.name)
782        validator.check_bool(is_training, "is_training", self.name)
783
784
785class Conv2D(Primitive):
786    r"""
787    2D convolution layer.
788
789    Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
790    where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width.
791
792    The output is calculated based on formula:
793
794    .. math::
795
796        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
797        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
798
799    where :math:`bias` is the output channel bias, :math:`ccor` is
800    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
801    , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
802
803    Here are the indices' meanings:
804
805    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
806      where :math:`N` is the batch size of the input.
807
808    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
809      where :math:`C_{out}` is the number of
810      output channels, which is also equal to the number of kernels.
811
812    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
813      where :math:`C_{in}` is the number of
814      input channels, which is also equal to the number of channels in the convolutional kernels.
815
816    Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
817    output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
818    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
819    channel in the :math:`i`-th batch of the input feature map.
820
821    The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]},\text{kernel_size[1]})`,
822    where :math:`\text{kernel_size[0]}`
823    and :math:`\text{kernel_size[1]}` are the height and width of the kernel, respectively.
824    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
825    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
826    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
827
828    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
829    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
830
831    Note:
832        On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
833        That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied.
834
835    Args:
836        out_channel (int): Specifies output channel :math:`C_{out}`.
837        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel.
838            It can be a single int or a tuple of 2 integers. A single int means the value is for both the height
839            and the width. A tuple of 2 ints means the first value is for the height and the other is for the width.
840        mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` .
841        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
842            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
843
844            - ``"same"``: Pad the input around its edges so that the shape of input and output
845              are the same when `stride` is set to ``1``.
846              The amount of padding to is calculated by the operator internally, If the amount is even, it is
847              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
848              If this mode is set, `pad` must be 0.
849            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
850              possible height and width. Extra pixels that could not complete a full stride will
851              be discarded. If this mode is set, `pad` must be 0.
852            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
853              in the height and width directions is determined by the `pad` parameter.
854              If this mode is set, `pad` must be greater than or equal to 0.
855
856        pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
857            when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 4 ints.
858            If `pad` is one integer, the paddings of top, bottom, left and right are the same, equal to `pad`.
859            If `pad` is a tuple with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
860            pad[1], pad[2], and pad[3] accordingly. Default: ``0`` .
861        stride (Union(int, tuple[int]), optional): Specifies the stride of the convolution kernel's movement.
862            It can be a single int or a tuple of two or four ints. A single int means the stride is the same in
863            both the height and width directions. A tuple of two ints indicates the strides in the height and
864            width directions, respectively. For a tuple of four ints, the two ints correspond to (N, C) dimension
865            are treated as 1, and the two correspond to (H, W) dimensions is the step size in the height
866            and width directions respectively. Default: ``1`` .
867        dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
868            It can be a single int or a tuple of 2 or 4 integers. A single int means the dilation size is the same
869            in both the height and width directions. A tuple of two ints represents the dilation size in
870            the height and width directions, respectively. For a tuple of four ints, the two ints correspond
871            to (N, C) dimension are treated as 1, and the two correspond to (H, W) dimensions is the
872            dilation size in the height and width directions respectively.
873            Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a
874            spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction.
875            The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively.
876            Default: ``1`` .
877        group (int, optional): Specifies the number of groups dividing `x`'s input channel when applying
878            group convolution. Default: ``1`` .
879        data_format (str, optional): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
880            Default: ``"NCHW"``. (NHWC is only supported in GPU now.)
881
882    Inputs:
883        - **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or
884          :math:`(N, H_{in}, W_{in}, C_{in}, )` depending on `data_format` .
885        - **weight** (Tensor) - The convolutional kernel value, it should has shape
886          :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` .
887
888    Outputs:
889        Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`
890        or :math:`(N, H_{out}, W_{out}, C_{out}, )`.
891        To see how different pad modes affect the output shape, please refer to
892        :class:`mindspore.nn.Conv2d` for more details.
893
894    Raises:
895        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
896        TypeError: If `out_channel` or `group` is not an int.
897        ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
898        ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``.
899        ValueError: If `pad` is a tuple whose length is not equal to 4.
900        ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to ``(0, 0, 0, 0)``.
901        ValueError: If `data_format` is neither ``'NHWC'`` nor ``'NCHW'`` .
902
903    Supported Platforms:
904        ``Ascend`` ``GPU`` ``CPU``
905
906    Examples:
907        >>> import mindspore
908        >>> import numpy as np
909        >>> from mindspore import Tensor, ops
910        >>> # case 1: All parameters use default values.
911        >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
912        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
913        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3)
914        >>> output = conv2d(x, weight)
915        >>> print(output.shape)
916        (10, 32, 30, 30)
917        >>> # case 2: pad_mode="pad", other parameters being default.
918        >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
919        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
920        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad", pad=(4, 10, 4, 10))
921        >>> output = conv2d(x, weight)
922        >>> print(output.shape)
923        (10, 32, 44, 44)
924        >>> # case 3: stride=(2, 4), other parameters being default.
925        >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
926        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
927        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, stride=(2, 4))
928        >>> output = conv2d(x, weight)
929        >>> print(output.shape)
930        (10, 32, 15, 8)
931        >>> # case 4: dilation=2, other parameters being default.
932        >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
933        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
934        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, dilation=2)
935        >>> output = conv2d(x, weight)
936        >>> print(output.shape)
937        (10, 32, 28, 28)
938        >>> # case 5: group=2, other parameters being default.
939        >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
940        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
941        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, group=2)
942        >>> output = conv2d(x, weight)
943        >>> print(output.shape)
944        (10, 32, 30, 30)
945        >>> # case 6: All parameters are specified.
946        >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
947        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
948        >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad",
949        ...                     pad=(4, 10, 4, 10), stride=(2, 4), dilation=2,  group=2)
950        >>> output = conv2d(x, weight)
951        >>> print(output.shape)
952        (10, 32, 21, 11)
953    """
954
955    @prim_attr_register
956    def __init__(self,
957                 out_channel,
958                 kernel_size,
959                 mode=1,
960                 pad_mode="valid",
961                 pad=0,
962                 stride=1,
963                 dilation=1,
964                 group=1,
965                 data_format="NCHW"):
966        """Initialize Conv2D"""
967        self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
968        self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
969        self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True)
970        self.add_prim_attr('stride', self.stride)
971        self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True)
972        self.add_prim_attr('dilation', self.dilation)
973        validator.check_value_type('pad', pad, (int, tuple), self.name)
974        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
975        if isinstance(pad, int):
976            pad = (pad,) * 4
977        else:
978            validator.check_equal_int(len(pad), 4, 'pad size', self.name)
979        self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name)
980
981        if pad_mode != 'pad' and pad != (0, 0, 0, 0):
982            raise ValueError(f"For '{self.name}', the 'pad' must be zero when 'pad_mode' is not 'pad', "
983                             f"but got 'pad': {self.pad} and 'pad_mode': {self.pad_mode}.")
984        self.add_prim_attr("pad", pad)
985        self.padding = pad
986        if self.pad_mode == 'pad':
987            for item in pad:
988                validator.check_non_negative_int(item, 'pad item', self.name)
989
990        self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
991        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
992        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
993            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
994                             f"but got the 'data_format' is {self.format} "
995                             f"and platform is {context.get_context('device_target')}.")
996        self.add_prim_attr('data_format', self.format)
997        self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
998        self.group = validator.check_positive_int(group, 'group', self.name)
999        self.add_prim_attr('groups', self.group)
1000
1001
1002class DataFormatVecPermute(Primitive):
1003    r"""
1004    Converts the input tensor from the `src_format` to the `dst_format` by permuting its dimensions.
1005
1006    Args:
1007        src_format (str, optional): the source data format, which can be ``'NHWC'`` and ``'NCHW'`` .
1008          Default: ``'NHWC'`` .
1009        dst_format (str, optional): the target data format, which can be ``'NHWC'`` and ``'NCHW'`` .
1010          Default: ``'NCHW'`` .
1011
1012    Inputs:
1013        - **input_x** (Tensor) - A Tensor of shape :math:`(4, )` or :math:`(4, 2)` in source data format.
1014          Supports int32 and int64 datatype.
1015
1016    Outputs:
1017        Tensor, has the same data type and shape as the `input_x`.
1018
1019    Raises:
1020        TypeError: If `input_x` is not a Tensor.
1021        TypeError: If dtype of `input_x` is neither int32 nor int64.
1022        ValueError: If `src_format` or `dst_format` is not a str in ['NHWC', 'NCHW'].
1023        ValueError: If `input_x` shape is not :math:`(4, )` or :math:`(4, 2)`.
1024
1025    Supported Platforms:
1026        ``Ascend`` ``GPU`` ``CPU``
1027
1028    Examples:
1029        >>> class Net(nn.Cell):
1030        ...     def __init__(self, src_format="NHWC", dst_format="NCHW"):
1031        ...         super().__init__()
1032        ...         self.op = ops.DataFormatVecPermute(src_format, dst_format)
1033        ...     def construct(self, x):
1034        ...         return self.op(x)
1035        ...
1036        >>> net = Net()
1037        >>> x = Tensor(np.array([1, 2, 3, 4]).astype(np.int32))
1038        >>> output = net(x)
1039        >>> print(output)
1040        [1 4 2 3]
1041    """
1042
1043    @prim_attr_register
1044    def __init__(self, src_format='NHWC', dst_format='NCHW'):
1045        """Initialize DataFormatVecPermute."""
1046        valid_values = ['NHWC', 'NCHW']
1047        self.src_format = validator.check_string(src_format, valid_values, "src_format", self.name)
1048        self.dst_format = validator.check_string(dst_format, valid_values, "dst_format", self.name)
1049        self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
1050
1051
1052class DepthwiseConv2dNative(PrimitiveWithInfer):
1053    r"""
1054    DepthwiseConv2dNative will be deprecated in the future. Please use :class:`mindspore.nn.Conv2d` instead.
1055
1056    Supported Platforms:
1057        Deprecated
1058    """
1059
1060    @prim_attr_register
1061    def __init__(self,
1062                 channel_multiplier,
1063                 kernel_size,
1064                 mode=3,
1065                 pad_mode="valid",
1066                 pad=0,
1067                 stride=1,
1068                 dilation=1,
1069                 group=1):
1070        """Initialize DepthwiseConv2dNative"""
1071        logger.warning("WARN_DEPRECATED: The usage of DepthwiseConv2dNative is deprecated."
1072                       " Please use nn.Conv2D.")
1073        self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
1074        self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
1075        self.stride = _check_positive_int_or_tuple('stride', stride, self.name)
1076        if self.stride[0] != self.stride[1]:
1077            raise ValueError("The height and width of 'stride' must be equal,"
1078                             f"but got height:{self.stride[0]},  width:{self.stride[1]}")
1079        self.add_prim_attr('stride', (1, 1, self.stride[0], self.stride[1]))
1080
1081        self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name)
1082        if self.dilation[0] != self.dilation[1]:
1083            raise ValueError("The height and width of 'dilation' must be equal,"
1084                             f"but got height:{self.dilation[0]},  width:{self.dilation[1]}")
1085        self.add_prim_attr('dilation', (1, 1, self.dilation[0], self.dilation[1]))
1086        validator.check_value_type('pad', pad, (int, tuple), self.name)
1087        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1088        if isinstance(pad, int):
1089            pad = (pad,) * 4
1090        else:
1091            validator.check_equal_int(len(pad), 4, 'pad size', self.name)
1092        self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name)
1093        if pad_mode != 'pad' and pad != (0, 0, 0, 0):
1094            raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0) when 'pad_mode' "
1095                             f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.")
1096        self.add_prim_attr("pad", pad)
1097        self.padding = pad
1098        if self.pad_mode == 'pad':
1099            for item in pad:
1100                validator.check_non_negative_int(item, 'pad item', self.name)
1101        self.mode = validator.check_equal_int(mode, 3, "mode", self.name)
1102        self.add_prim_attr('data_format', "NCHW")
1103        self.channel_multiplier = validator.check_positive_int(channel_multiplier, "channel_multiplier", self.name)
1104        self.group = validator.check_positive_int(group, "group", self.name)
1105        self.add_prim_attr('offset_a', 0)
1106
1107    def infer_shape(self, x_shape, w_shape, b_shape=None):
1108        validator.check_equal_int(len(w_shape), 4, "weight rank", self.name)
1109        validator.check_equal_int(len(x_shape), 4, "x rank", self.name)
1110        validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], validator.EQ, self.name)
1111        validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), validator.EQ, self.name)
1112
1113        kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape
1114        _, _, stride_h, stride_w = self.stride
1115        _, _, dilation_h, dilation_w = self.dilation
1116        if kernel_size_n != 1:
1117            raise ValueError(f"For '{self.name}', the batch of 'weight' must be 1, but got {kernel_size_n}")
1118        if self.pad_mode == "valid":
1119            h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h)
1120            w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w)
1121            pad_top, pad_bottom, pad_left, pad_right = 0, 0, 0, 0
1122        elif self.pad_mode == "same":
1123            h_out = math.ceil(x_shape[2] / stride_h)
1124            w_out = math.ceil(x_shape[3] / stride_w)
1125
1126            pad_needed_h = max(0, (h_out - 1) * stride_h + dilation_h * (kernel_size_h - 1) + 1 - x_shape[2])
1127            pad_top = math.floor(pad_needed_h / 2)
1128            pad_bottom = pad_needed_h - pad_top
1129
1130            pad_needed_w = max(0, (w_out - 1) * stride_w + dilation_w * (kernel_size_w - 1) + 1 - x_shape[3])
1131            pad_left = math.floor(pad_needed_w / 2)
1132            pad_right = pad_needed_w - pad_left
1133        elif self.pad_mode == 'pad':
1134            pad_top, pad_bottom, pad_left, pad_right = self.padding
1135
1136            h_out = 1 + (x_shape[2] + pad_top + pad_bottom - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \
1137                    / stride_h
1138            w_out = 1 + (x_shape[3] + pad_left + pad_right - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \
1139                    / stride_w
1140            h_out = math.floor(h_out)
1141            w_out = math.floor(w_out)
1142
1143        self.pad_list = (pad_top, pad_bottom, pad_left, pad_right)
1144        self.add_prim_attr('pad_list', self.pad_list)
1145
1146        out_channel = self.channel_multiplier * x_shape[1]
1147        out_shape = [x_shape[0], out_channel, h_out, w_out]
1148        return out_shape
1149
1150    def infer_dtype(self, x_dtype, w_dtype, b_dtype=None):
1151        args = {'x': x_dtype, 'w': w_dtype}
1152        validator.check_tensors_dtypes_same_and_valid(args, mstype.number_type, self.name)
1153        if x_dtype.element_type() == mstype.int8:
1154            return mstype.TensorType(mstype.int32)
1155        return x_dtype
1156
1157
1158class _Pool(PrimitiveWithInfer):
1159    r"""
1160    Performs max/avg pooling operation.
1161
1162    Args:
1163        kernel_size (Union[int, tuple[int]]): The size of the kernel, that must be a tuple
1164           of two `int` for height and width. Default: ``1`` .
1165        strides (Union[int, tuple[int]]): The stride of the window, that must be
1166            a tuple of two `int` for height and width. Default: ``1`` .
1167        pad_mode (str): The optional value for pad mode, is ``"same"`` or ``"valid"`` .
1168            Default: ``"valid"`` .
1169        data_format (str): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
1170            Default: ``"NCHW"`` .
1171    """
1172
1173    @prim_attr_register
1174    def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
1175        """Initialize _Pool."""
1176        self.init_prim_io_names(inputs=['x'], outputs=['output'])
1177        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
1178        validator.check_value_type('strides', strides, [int, tuple], self.name)
1179        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1180        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
1181        self.add_prim_attr("pad_mode", self.pad_mode)
1182        self.is_maxpoolwithargmax = (self.name == "MaxPoolWithArgmax")
1183        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
1184        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
1185            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
1186                             f"but got the 'data_format' is {self.format} and "
1187                             f"the platform is {context.get_context('device_target')}.")
1188        if not self.is_maxpoolwithargmax:
1189            self.add_prim_attr('data_format', self.format)
1190
1191        self.kernel_size = _check_positive_int_or_tuple(
1192            "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
1193        if self.is_maxpoolwithargmax:
1194            self.kernel_size = (1, self.kernel_size[-2], self.kernel_size[-1], 1)
1195        self.add_prim_attr("kernel_size", self.kernel_size)
1196
1197        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
1198        if self.is_maxpoolwithargmax:
1199            self.strides = (1, self.strides[-2], self.strides[-1], 1)
1200        self.add_prim_attr("strides", self.strides)
1201
1202    def infer_shape(self, x_shape):
1203        x_shape_norm = x_shape if self.format == "NCHW" else [x_shape[0], x_shape[3], x_shape[1], x_shape[2]]
1204        validator.check_equal_int(len(x_shape_norm), 4, "x rank", self.name)
1205        batch, channel, input_h, input_w = x_shape_norm
1206        if self.is_maxpoolwithargmax:
1207            _, kernel_h, kernel_w, _ = self.kernel_size
1208            _, stride_h, stride_w, _ = self.strides
1209        else:
1210            _, _, kernel_h, kernel_w = self.kernel_size
1211            _, _, stride_h, stride_w = self.strides
1212
1213        if self.pad_mode == "VALID":
1214            if input_h == -1:
1215                out_h = -1
1216            else:
1217                out_h = math.ceil((input_h - (kernel_h - 1)) / stride_h)
1218            if input_w == -1:
1219                out_w = -1
1220            else:
1221                out_w = math.ceil((input_w - (kernel_w - 1)) / stride_w)
1222        elif self.pad_mode == "SAME":
1223            if input_h == -1:
1224                out_h = -1
1225            else:
1226                out_h = math.ceil(input_h / stride_h)
1227            if input_w == -1:
1228                out_w = -1
1229            else:
1230                out_w = math.ceil(input_w / stride_w)
1231        out_shape = [batch, channel, out_h, out_w] if self.format == "NCHW" else [batch, out_h, out_w, channel]
1232
1233        is_dynamic_shape = False
1234        for in_shape_val in x_shape_norm:
1235            if in_shape_val == -1:
1236                is_dynamic_shape = True
1237
1238        for out_shape_val in out_shape:
1239            if out_shape_val <= 0 and not is_dynamic_shape:
1240                raise ValueError(f"For '{self.name}', the each element of the output shape must be larger than 0, "
1241                                 f"but got output shape: {out_shape}. The input shape: {x_shape}, "
1242                                 f"kernel size: {self.kernel_size}, strides: {self.strides}."
1243                                 f"Please check the official api documents for "
1244                                 f"more information about the output.")
1245        return out_shape
1246
1247    def infer_dtype(self, x_dtype):
1248        validator.check_subclass("input", x_dtype, mstype.tensor_type, self.name)
1249        return x_dtype
1250
1251
1252class MaxPool(_Pool):
1253    r"""
1254    Max pooling operation.
1255
1256    Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
1257
1258    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
1259    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
1260    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows:
1261
1262    .. math::
1263        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
1264        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
1265
1266    Args:
1267        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1268            is an int number that represents height and width of the kernel, or a tuple
1269            of two int numbers that represent height and width respectively. Default: ``1`` .
1270        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
1271            not only the height of movement but also the width of movement, or a tuple of two int numbers that
1272            represent height and width of movement respectively. Default: ``1`` .
1273        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1274            ``'same'`` or ``'valid'`` . Default: ``'valid'`` .
1275
1276            - ``'same'``: Pad the input around its edges so that the shape of input and output
1277              are the same when `stride` is set to ``1``.
1278              The amount of padding to is calculated by the operator internally, If the amount is even, it is
1279              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1280            - ``'valid'``: No padding is applied to the input, and the output returns the maximum
1281              possible height and width. Extra pixels that could not complete a full stride will
1282              be discarded.
1283
1284        data_format (str) : The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
1285            Default: ``'NCHW'`` .
1286
1287    Inputs:
1288        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
1289          Supported dtypes:
1290
1291          - CPU: float16, float32, float64.
1292          - GPU/Ascend: float16, float32.
1293
1294    Outputs:
1295        Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
1296
1297    Raises:
1298        TypeError: If `kernel_size` or `strides` is neither int nor tuple.
1299        ValueError: If `pad_mode` is neither ``'valid'`` nor ``'same'`` with not case sensitive.
1300        ValueError: If `data_format` is neither ``'NCHW'`` nor ``'NHWC'``.
1301        ValueError: If `kernel_size` or `strides` is less than 1.
1302        ValueError: If length of shape of `input` is not equal to 4.
1303
1304    Supported Platforms:
1305        ``Ascend`` ``GPU`` ``CPU``
1306
1307    Examples:
1308        >>> import mindspore
1309        >>> import numpy as np
1310        >>> from mindspore import Tensor, ops
1311        >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
1312        >>> maxpool_op = ops.MaxPool(pad_mode="VALID", kernel_size=2, strides=1)
1313        >>> output = maxpool_op(x)
1314        >>> print(output)
1315        [[[[ 5.  6.  7.]
1316           [ 9. 10. 11.]]
1317          [[17. 18. 19.]
1318           [21. 22. 23.]]
1319          [[29. 30. 31.]
1320           [33. 34. 35.]]]]
1321    """
1322
1323    @prim_attr_register
1324    def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
1325        """Initialize MaxPool."""
1326        super(MaxPool, self).__init__(kernel_size, strides, pad_mode, data_format)
1327
1328
1329class MaxPoolV1(Primitive):
1330    r"""
1331    Maxpooling operation.
1332
1333    Applies a 2D maxpooling over an input Tensor which can be regarded as a composition of 2D planes.
1334
1335    Typically, the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPoolV1
1336    outputs regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
1337    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_h, s_w)`, the operation is as follows.
1338
1339    .. math::
1340        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
1341        \text{input}(N_i, C_j, s_h \times h + m, s_w \times w + n)
1342
1343    Args:
1344        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value,
1345            is an integer that represents height and width of the kernel, or a tuple
1346            of two integers that represent height and width respectively. Default: ``1`` .
1347        strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
1348            the height and width of movement are both strides, or a tuple of two integers that
1349            represent height and width of movement, respectively. Default: ``1`` .
1350        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1351            ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
1352
1353            - ``"same"``: Pad the input around its edges so that the shape of input and output
1354              are the same when `stride` is set to ``1``.
1355              The amount of padding to is calculated by the operator internally, If the amount is even, it is
1356              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1357            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
1358              possible height and width. Extra pixels that could not complete a full stride will
1359              be discarded.
1360
1361        data_format (str) : The optional value for data format, is ``'NCHW'`` or ``'NHWC'`` .
1362            Default: ``'NCHW'`` .
1363
1364    Inputs:
1365        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
1366
1367    Outputs:
1368        Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
1369
1370    Raises:
1371        TypeError: If `kernel_size` or `strides` is neither int nor tuple.
1372        ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive.
1373        ValueError: If `data_format` is neither 'NHWC' nor 'NCHW'.
1374        ValueError: If `kernel_size` or `strides` is less than 1.
1375        ValueError: If the length of shape of `input` is not equal to 4.
1376
1377    Supported Platforms:
1378        ``Ascend``
1379
1380    Examples:
1381        >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
1382        >>> maxpoolv1_op = ops.MaxPoolV1(pad_mode="VALID", kernel_size=2, strides=1)
1383        >>> output_ = maxpoolv1_op(x)
1384        >>> print(output_)
1385        [[[[ 5.  6.  7.]
1386           [ 9. 10. 11.]]
1387          [[17. 18. 19.]
1388           [21. 22. 23.]]
1389          [[29. 30. 31.]
1390           [33. 34. 35.]]]]
1391    """
1392
1393    @prim_attr_register
1394    def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
1395        """Initialize MaxPoolV1."""
1396        self.init_prim_io_names(inputs=['x'], outputs=['output'])
1397        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
1398        validator.check_value_type('strides', strides, [int, tuple], self.name)
1399        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1400        self.pad_mode = validator.check_string(
1401            pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
1402        self.add_prim_attr("pad_mode", self.pad_mode)
1403        self.format = validator.check_string(
1404            data_format, ['NCHW', 'NHWC'], 'format', self.name)
1405        self.add_prim_attr('data_format', self.format)
1406
1407        self.kernel_size = _check_positive_int_or_tuple(
1408            "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
1409        self.strides = _check_positive_int_or_tuple(
1410            "strides", strides, self.name, allow_four=False, ret_four=True)
1411
1412        kernel_size_adapted = self.kernel_size if self.format == 'NCHW' else (
1413            self.kernel_size[0], self.kernel_size[2], self.kernel_size[3], self.kernel_size[1])
1414        strides_adapted = self.strides if self.format == 'NCHW' else (
1415            self.strides[0], self.strides[2], self.strides[3], self.strides[1])
1416
1417        self.add_prim_attr("kernel_size", kernel_size_adapted)
1418        self.add_prim_attr("strides", strides_adapted)
1419
1420
1421class MaxPool3D(Primitive):
1422    r"""
1423    Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
1424
1425    Typically the input is of shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})`, MaxPool outputs
1426    regional maximum in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size
1427    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows:
1428
1429    .. math::
1430        \text{output}(N_i, C_j, d, h, w) =
1431        \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
1432        \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
1433
1434    Args:
1435        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1436            is an int number that represents depth, height and width of the kernel, or a tuple
1437            of three int numbers that represent depth, height and width respectively. Default: ``1`` .
1438        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
1439            not only the depth, height of movement but also the width of movement,, or a tuple of three int numbers that
1440            represent depth, height and width of movement respectively. Default: ``1`` .
1441        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1442            ``"SAME"`` , ``"VALID"`` or ``"PAD"`` . Default: ``"VALID"`` .
1443
1444            - ``"SAME"``: Pad the input around its depth/height/width dimension so that the shape of input and output
1445              are the same when `stride` is set to ``1``.
1446              The amount of padding to is calculated by the operator internally.  If the amount is even,
1447              it isuniformly distributed around the input, if it is odd, the excess amount goes
1448              to the front/right/bottom side.
1449              If this mode is set, `pad_list` must be 0.
1450            - ``"VALID"``: No padding is applied to the input, and the output returns the maximum
1451              possible depth, height and width. Extra pixels that could not complete a full stride will
1452              be discarded. If this mode is set, `pad_list` must be 0.
1453            - ``"PAD"``: Pad the input with a specified amount. In this mode, the amount of padding
1454              in the depth, height and width dimension is determined by the `pad_list` parameter.
1455              If this mode is set, `pad_list` must be greater than or equal to 0.
1456
1457        pad_list (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
1458            paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
1459            integers, the padding of head, tail, top, bottom, left and right equals to pad[0], pad[1], pad[2],
1460            pad[3], pad[4] and pad[5] correspondingly.
1461        ceil_mode (Union[bool, None]): Whether to use ceil instead of floor to calculate output shape.
1462            Only effective in "pad" mode.
1463            When `pad_mode` is ``"pad"`` and "ceil_mode" is ``None`` , `ceil_mode` will be set as ``False``.
1464            Default: ``None`` .
1465        data_format (str) : The optional value for data format. Currently only support ``"NCDHW"`` .
1466            Default: ``"NCDHW"`` .
1467
1468    Inputs:
1469        - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
1470          Data type must be float16, float32 or float64.
1471
1472    Outputs:
1473        Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the data type of `x`.
1474
1475    Raises:
1476        TypeError: If `kernel_size` or `strides` is neither an int nor a tuple.
1477        TypeError: If `pad_mode` or `data_format` is not a string.
1478        ValueError: If numbers in `kernel_size` or `strides` are not positive.
1479        ValueError: If `pad_mode` is not one of ``"SAME"``, ``"VALID"`` or ``"PAD"``.
1480        ValueError: If `pad_mode` is ``"SAME"`` or ``"VALID"``, `ceil_mode` is not ``None``.
1481        ValueError: If `kernel_size` or `strides` is a tuple whose length is not equal to 3.
1482        ValueError: If `data_format` is not ``"NCDHW"``.
1483
1484    Supported Platforms:
1485        ``Ascend`` ``GPU`` ``CPU``
1486
1487    Examples:
1488        >>> import mindspore
1489        >>> import numpy as np
1490        >>> from mindspore import Tensor, ops
1491        >>> x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float32)
1492        >>> max_pool3d = ops.MaxPool3D(kernel_size=2, strides=1, pad_mode="VALID")
1493        >>> output = max_pool3d(x)
1494        >>> print(output)
1495        [[[[[10. 11.]]]
1496          [[[22. 23.]]]]]
1497    """
1498
1499    @prim_attr_register
1500    def __init__(self, kernel_size=1, strides=1, pad_mode="VALID", pad_list=0, ceil_mode=None, data_format="NCDHW"):
1501        """Initialize MaxPool3D."""
1502        self.init_prim_io_names(inputs=['x'], outputs=['output'])
1503        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
1504        validator.check_value_type('strides', strides, [int, tuple], self.name)
1505        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1506        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME', 'PAD'], 'pad_mode', self.name)
1507        if pad_mode.upper() == "PAD":
1508            self.pad_mode = "CALCULATED"
1509        self.add_prim_attr("pad_mode", self.pad_mode)
1510        self.data_format = validator.check_string(data_format, ['NCDHW'], 'data_format', self.name)
1511        self.kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.name, ret_five=True)
1512        self.add_prim_attr("kernel_size", self.kernel_size)
1513        self.strides = _check_3d_int_or_tuple("strides", strides, self.name, ret_five=True)
1514        self.add_prim_attr("strides", self.strides)
1515        if ceil_mode is None:
1516            self.ceil_mode = False
1517        else:
1518            self.ceil_mode = validator.check_value_type('ceil_mode', ceil_mode, [bool], self.name)
1519            if self.pad_mode != "CALCULATED":
1520                raise ValueError("When the 'pad_mode' is 'same' or 'valid', the 'ceil_mode' only supports 'None'.")
1521        self.add_prim_attr("ceil_mode", int(self.ceil_mode))
1522
1523        validator.check_value_type('pad_list', pad_list, (int, tuple), self.name)
1524        self.pad_list = pad_list
1525        if isinstance(self.pad_list, int):
1526            self.pad_list = (self.pad_list,) * 6
1527        if len(self.pad_list) == 3:
1528            self.pad_list = (pad_list[0], pad_list[0], pad_list[1], pad_list[1], pad_list[2], pad_list[2])
1529        if len(self.pad_list) != 3 and len(self.pad_list) != 6:
1530            raise ValueError(f"For '{self.name}', attr 'pad_list' must be an positive int number or a tuple of "
1531                             f"three or six positive int numbers, but got {len(self.pad_list)} numbers.")
1532        if self.pad_mode != 'CALCULATED' and self.pad_list != (0, 0, 0, 0, 0, 0):
1533            raise ValueError(f"For '{self.name}', the 'pad_list' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' "
1534                             f"is not \"pad\", but got 'pad_list' is {pad_list} and 'pad_mode' is {pad_mode}.")
1535        if self.pad_mode == 'CALCULATED':
1536            for item in self.pad_list:
1537                validator.check_non_negative_int(item, 'pad_list item', self.name)
1538        self.add_prim_attr("pad_list", self.pad_list)
1539
1540
1541class MaxUnpool2D(Primitive):
1542    r"""
1543    Calculates the partial inverse of MaxPool2D operation.
1544
1545    Since MaxPool2D loses non-maximal values, it is not fully invertible.
1546    Therefore, MaxUnpool2D takes the output of MaxPool2D, including the indices of
1547    the maximal values, and computes a partial inverse where all non-maximal values are set to zero.
1548    Typically the input is of shape :math:`(N, C, H_{in}, W_{in})` ,
1549    the output is of shape :math:`(N, C, H_{out}, W_{out})` , the operation is as follows:
1550
1551    .. math::
1552        \begin{array}{ll} \\
1553        H_{out} = (H{in} - 1) \times strides[0] - 2 \times pads[0] + ksize[0] \\
1554        W_{out} = (W{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\
1555        \end{array}
1556
1557    .. warning::
1558        This is an experimental API that is subject to change or deletion.
1559
1560    Args:
1561        ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1562            is an int number that represents height and width of the kernel, or a tuple
1563            of two int numbers that represent height and width respectively.
1564        strides (Union[int, tuple[int]], optional): The strides of kernel moving.
1565            If `strides` is 0 or (0, 0), then `strides` equal to `ksize` . Default: ``0`` .
1566
1567            - An int number that represents the height and width of movement are both `strides` .
1568            - A tuple of two int numbers that represent height and width of movement respectively.
1569
1570        pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` .
1571
1572            - If `pads` is an integer, the paddings of height and width are the same, equal to pads.
1573            - If `pads` is a tuple of two integers, the padding of height and width equal to pads[0]
1574              and pads[1] correspondingly.
1575
1576        output_shape (tuple[int], optional): The target output size is an optional input. Default: ``()`` .
1577
1578            - If :math:`output\_shape == ()` , then the shape of output computed by `kszie`, `strides` and `pads` .
1579            - If :math:`output\_shape != ()` , then `output_shape` must be :math:`(N, C, H, W)` or :math:`(N, H, W, C)`
1580              and `output_shape` must belong to :math:`[(N, C, H_{out} - strides[0], W_{out} - strides[1]),
1581              (N, C, H_{out} + strides[0], W_{out} + strides[1])]`.
1582
1583        data_format (str, optional): The optional value for data format.
1584            Currently support ``"NCHW"`` and ``"NHWC"`` . Default: ``"NCHW"`` .
1585
1586    Inputs:
1587        - **x** (Tensor) - The input Tensor to invert.
1588          Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C)`.
1589        - **argmax** (Tensor) - Max values' index represented by the `argmax`.
1590          Tensor of shape must be same with input `x`.
1591          Values of `argmax` must belong to :math:`[0, H_{in} \times W_{in} - 1]`.
1592          Data type must be in int32 or int64.
1593
1594    Outputs:
1595        Tensor, with shape :math:`(N, C, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C)`.
1596        Has the same data type with `x`.
1597
1598    Raises:
1599        TypeError: If data type of `x` or `argmax` is not supported.
1600        TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple.
1601        ValueError: If numbers in `strides` (also support 0 and (0, 0)) or `ksize` is not positive.
1602        ValueError: If numbers in `pads` is negative.
1603        ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 2.
1604        ValueError: If `data_format` is not a str or is neither `NCHW` nor `NHWC`.
1605        ValueError: If `output_shape` whose length is neither 0 or 4.
1606        ValueError: If `output_shape` is not close to output size
1607                    computed by attr `ksize`, `strides` and `pads`.
1608
1609    Supported Platforms:
1610        ``Ascend`` ``GPU`` ``CPU``
1611
1612    Examples:
1613        >>> import numpy as np
1614        >>> from mindspore import Tensor, ops
1615        >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32))
1616        >>> argmax = Tensor(np.array([[[[0, 1], [2, 3]]]]).astype(np.int64))
1617        >>> maxunpool2d = ops.MaxUnpool2D(ksize=1, strides=1, pads=0)
1618        >>> output = maxunpool2d(x, argmax)
1619        >>> print(output.asnumpy())
1620        [[[[0. 1.]
1621            [8. 9.]]]]
1622    """
1623
1624    @prim_attr_register
1625    def __init__(self, ksize, strides=0, pads=0, output_shape=(), data_format="NCHW"):
1626        """Initialize MaxUnpool2D."""
1627        self.init_prim_io_names(inputs=['x', 'argmax'], outputs=['y'])
1628        self.ksize = _check_positive_int_or_tuple('ksize', ksize, self.name, ret_four=True)
1629        if strides in (0, (0, 0)):
1630            strides = ksize
1631        self.strides = _check_positive_int_or_tuple('strides', strides, self.name, ret_four=True)
1632        self.pads = _check_positive_int_or_tuple('pads', pads, self.name, ret_four=True, strict_positive=False)
1633        self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name)
1634
1635        if data_format == "NHWC":
1636            self.ksize = (self.ksize[0], self.ksize[2], self.ksize[3], self.ksize[1])
1637            self.strides = (self.strides[0], self.strides[2], self.strides[3], self.strides[1])
1638            self.pads = (self.pads[0], self.pads[2], self.pads[3], self.pads[1])
1639
1640        self.add_prim_attr('ksize', self.ksize)
1641        self.add_prim_attr('strides', self.strides)
1642        self.add_prim_attr('pads', self.pads)
1643
1644        validator.check_value_type("output_shape", output_shape, [tuple], self.name)
1645        self.output_shape = output_shape
1646
1647
1648class MaxUnpool3D(Primitive):
1649    r"""
1650    Computes the inverse of :class:`mindspore.ops.MaxPool3D`.
1651
1652    MaxUnpool3D keeps the maximal value and set all position of non-maximal values to zero.
1653    Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, the output is of
1654    shape :math:`(N, C, D_{out}, H_{out}, W_{out})`, the operation is as follows.
1655
1656    .. math::
1657        \begin{array}{ll} \\
1658        D_{out} = (D{in} - 1) \times strides[0] - 2 \times pads[0] + ksize[0] \\
1659        H_{out} = (H{in} - 1) \times strides[1] - 2 \times pads[1] + ksize[1] \\
1660        W_{out} = (W{in} - 1) \times strides[2] - 2 \times pads[2] + ksize[2] \\
1661        \end{array}
1662
1663    .. warning::
1664        This is an experimental API that is subject to change or deletion.
1665
1666    Args:
1667        ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1668            is an int number that represents depth, height and width of the kernel, or a tuple
1669            of three int numbers that represent depth, height and width respectively.
1670        strides (Union[int, tuple[int]], optional): The distance of kernel moving. Default: ``0`` .
1671
1672            - If it is an int number, the depth, height and width of movement are all equal to `strides`.
1673            - If it is a tuple of three int numbers, they represent depth, height and width of movement respectively.
1674            - If strides is 0 or (0, 0, 0), then `strides` equal to `ksize`.
1675
1676        pads (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` .
1677
1678            - If `pads` is an integer, the paddings of depth, height and width are the same, equal to pads.
1679            - If `pads` is a tuple of three integers, the padding of depth, height and width equal to pads[0],
1680              pads[1] and pads[2] correspondingly.
1681
1682        output_shape (tuple[int], optional) : The target output size. Default: ``()`` .
1683            If :math:`output\_shape == ()`, then the shape of output computed by kszie, strides and pads shown above.
1684            If :math:`output\_shape != ()`, then output_shape format must be :math:`(N, C, D, H, W)` or
1685            :math:`(N, D, H, W, C)` and output_shape must be in range
1686            :math:`[(N, C, D_{out} - strides[0], H_{out} - strides[1], W_{out} - strides[2]),
1687            (N, C, D_{out} + strides[0], H_{out} + strides[1], W_{out} + strides[2])]`.
1688        data_format (str, optional) : The optional value for data format. Currently
1689            support ``'NCDHW'`` and ``'NDHWC'`` . Default: ``'NCDHW'`` .
1690
1691    Inputs:
1692        - **x** (Tensor) - The input Tensor to invert.
1693          Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`.
1694        - **argmax** (Tensor) - Max values' index. Tensor that has the same shape as `x`.
1695          Values of `argmax` must be in range :math:`[0, D_{in} \times H_{in} \times W_{in} - 1]`.
1696          Data type must be int32 or int64.
1697
1698    Outputs:
1699        Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
1700        Has the same data type with `x`.
1701
1702    Raises:
1703        TypeError: If data type of `x` or `argmax` is Number.
1704        TypeError: If `ksize`, `strides` or `pads` is neither int nor tuple.
1705        ValueError: If numbers in `strides` or `ksize` is negative.
1706        ValueError: If numbers in `pads` is negative.
1707        ValueError: If `ksize`, `strides` or `pads` is a tuple whose length is not equal to 3.
1708        ValueError: If `data_format` is not a str or is neither ``'NCDHW'`` nor ``'NDHWC'``.
1709        ValueError: If `output_shape` whose length is neither 0 or 5.
1710        ValueError: If `output_shape` is not close to output size range
1711                    computed by attr `ksize, strides, pads`.
1712
1713    Supported Platforms:
1714        ``Ascend`` ``GPU`` ``CPU``
1715
1716    Examples:
1717        >>> import numpy as np
1718        >>> from mindspore import Tensor, ops
1719        >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32))
1720        >>> argmax = Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64))
1721        >>> maxunpool3d = ops.MaxUnpool3D(ksize=1, strides=1, pads=0)
1722        >>> output = maxunpool3d(x, argmax)
1723        >>> print(output.asnumpy())
1724        [[[[[0. 1.]
1725            [8. 9.]]]]]
1726    """
1727
1728    @prim_attr_register
1729    def __init__(self, ksize, strides=0, pads=0, output_shape=(), data_format="NCDHW"):
1730        """Initialize MaxUnpool3D."""
1731        self.init_prim_io_names(inputs=['x', 'argmax'], outputs=['y'])
1732        self.ksize = _check_3d_int_or_tuple('ksize', ksize, self.name, ret_five=True)
1733        if strides in (0, (0, 0, 0)):
1734            strides = ksize
1735        self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True)
1736        self.pads = _check_3d_int_or_tuple('pads', pads, self.name, ret_five=True, greater_zero=False)
1737        self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name)
1738        if data_format == "NDHWC":
1739            self.ksize = (self.ksize[0], self.ksize[2], self.ksize[3], self.ksize[4], self.ksize[1])
1740            self.strides = (self.strides[0], self.strides[2], self.strides[3], self.strides[4], self.strides[1])
1741            self.pads = (self.pads[0], self.pads[2], self.pads[3], self.pads[4], self.pads[1])
1742
1743        self.add_prim_attr('ksize', self.ksize)
1744        self.add_prim_attr('strides', self.strides)
1745        self.add_prim_attr('pads', self.pads)
1746
1747        validator.check_value_type("output_shape", output_shape, [tuple], self.name)
1748        self.output_shape = output_shape
1749
1750
1751class AvgPoolV1(Primitive):
1752    r"""
1753    Average-pooling operation.
1754
1755    Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D planes.
1756    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPoolV1 outputs
1757    regional average in the :math:`(H_{in}, W_{in})`-dimension. Given window size
1758    :math:`ks = (h_{ker}, w_{ker})` and strides :math:`s = (s_0, s_1)`, the operation is as follows.
1759
1760    .. math::
1761        \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1}
1762        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
1763
1764    .. warning::
1765        - Only single input and single output are supported.
1766        - Global average pooling is supported.
1767        - The height of "kernel_size" and the weight of "kernel_size" are positive integers within the range [1, 255].
1768          ksize_h * ksize_w < 256.
1769        - Due to instruction restrictions, the values of "strides_h" and "strides_w" are
1770          positive integers within the range [1, 64).
1771
1772    Args:
1773        kernel_size (Union[int, tuple[int]]): The size of the kernel used to take the average value,
1774            is an integer that represents height and width of the kernel, or a tuple
1775            of two integers that represent height and width respectively. Default: ``1`` .
1776        strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
1777            the height and width of movement are both strides, or a tuple of two integers that
1778            represent height and width of movement, respectively. Default: ``1`` .
1779        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1780            ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
1781
1782            - ``"same"``: Pad the input around its edges so that the shape of input and output
1783              are the same when `stride` is set to ``1``.
1784              The amount of padding to is calculated by the operator internally, If the amount is even, it is
1785              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1786            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
1787              possible height and width. Extra pixels that could not complete a full stride will
1788              be discarded.
1789
1790        data_format (str): The format of input and output data. Should be ``'NHWC'`` or ``'NCHW'`` .
1791            Default: ``'NCHW'`` .
1792
1793    Inputs:
1794        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
1795
1796    Outputs:
1797        Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
1798
1799    Raises:
1800        TypeError: If `kernel_size` or `strides` is neither int nor tuple.
1801        ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive.
1802        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
1803        ValueError: If `kernel_size` or `strides` is less than 1.
1804        ValueError: If length of shape of `x` is not equal to 4.
1805
1806    Supported Platforms:
1807        ``Ascend``
1808
1809    Examples:
1810        >>> x = Tensor(np.arange(1 * 2 * 4 * 4).reshape((1, 2, 4, 4)), mindspore.float64)
1811        >>> avgpoolv1_op = ops.AvgPoolV1(pad_mode="VALID", kernel_size=3, strides=1)
1812        >>> _output = avgpoolv1_op(x)
1813        >>> print(_output)
1814        [[[[ 5.  6.]
1815           [ 9. 10.]]
1816          [[21. 22.]
1817           [25. 26.]]]]
1818    """
1819
1820    @prim_attr_register
1821    def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
1822        """Initialize AvgPoolV1."""
1823        self.init_prim_io_names(inputs=['x'], outputs=['output'])
1824        validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
1825        validator.check_value_type('strides', strides, [int, tuple], self.name)
1826        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1827        self.pad_mode = validator.check_string(
1828            pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
1829        self.add_prim_attr("pad_mode", self.pad_mode)
1830        self.format = validator.check_string(
1831            data_format, ['NCHW', 'NHWC'], 'format', self.name)
1832        self.add_prim_attr('data_format', self.format)
1833        self.kernel_size = _check_positive_int_or_tuple(
1834            "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
1835        self.strides = _check_positive_int_or_tuple(
1836            "strides", strides, self.name, allow_four=False, ret_four=True)
1837
1838        # adapt data_format
1839        self.kernel_size_adapted = self.kernel_size if self.format == "NCHW" else (
1840            self.kernel_size[0], self.kernel_size[2], self.kernel_size[3], self.kernel_size[1])
1841        self.add_prim_attr("kernel_size", self.kernel_size_adapted)
1842        self.strides_adapted = self.strides if self.format == "NCHW" else (
1843            self.strides[0], self.strides[2], self.strides[3], self.strides[1])
1844        self.add_prim_attr("strides", self.strides_adapted)
1845
1846
1847class Conv2DBackpropInput(Primitive):
1848    r"""
1849    The Conv2DBackpropInput interface is deprecated, please refer to :class:`mindspore.ops.Conv2DTranspose` if you
1850    want to do unsampling.
1851
1852    Supported Platforms:
1853        Deprecated
1854    """
1855    __mindspore_signature__ = (
1856        sig.make_sig('out_backprop', dtype=sig.sig_dtype.T),
1857        sig.make_sig('filter', dtype=sig.sig_dtype.T1),
1858        sig.make_sig('input_sizes', dtype=sig.sig_dtype.T2)
1859    )
1860
1861    @prim_attr_register
1862    def __init__(self,
1863                 out_channel,
1864                 kernel_size,
1865                 pad_mode="valid",
1866                 pad=0,
1867                 pad_list=None,
1868                 mode=1,
1869                 stride=1,
1870                 dilation=1,
1871                 group=1,
1872                 data_format="NCHW"):
1873        """Initialize Conv2DBackpropInput"""
1874        self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output'])
1875        self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
1876        self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
1877        self.add_prim_attr('kernel_size', self.kernel_size)
1878        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
1879        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
1880            raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
1881                             f"but got the 'data_format' is {self.format} and "
1882                             f"the platform is {context.get_context('device_target')}.")
1883        self.add_prim_attr('data_format', self.format)
1884        self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True)
1885        self.stride = _update_attr_by_format(self.stride, self.format)
1886        self.add_prim_attr('stride', self.stride)
1887        self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True)
1888        self.dilation = _update_attr_by_format(self.dilation, self.format)
1889        self.add_prim_attr('dilation', self.dilation)
1890        validator.check_value_type('pad', pad, (int, tuple), self.name)
1891        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1892        if isinstance(pad, int):
1893            pad = (pad,) * 4
1894        else:
1895            validator.check_equal_int(len(pad), 4, 'pad size', self.name)
1896        self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name)
1897        if pad_mode != 'pad' and pad != (0, 0, 0, 0):
1898            raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0) when 'pad_mode' "
1899                             f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.")
1900        self.add_prim_attr("pad", pad)
1901        self.padding = pad
1902        if self.pad_mode == 'pad':
1903            for item in pad:
1904                validator.check_non_negative_int(item, 'pad item', self.name)
1905
1906        pad_mode = pad_mode.upper()
1907        self.add_prim_attr('pad_mode', pad_mode)
1908        self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
1909        self.group = validator.check_positive_int(group, 'group', self.name)
1910        self.add_prim_attr('groups', self.group)
1911        if pad_list:
1912            for x in pad_list:
1913                if x != -1:
1914                    validator.check_non_negative_int(x, 'element of pad_list', self.name)
1915            self.pad_list = pad_list
1916
1917
1918class MaxPool3DWithArgmax(Primitive):
1919    r"""
1920    Performs a 3D max pooling on the input Tensor and returns both max values and indices.
1921
1922    Typically the input is a Tensor with shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})`, outputs
1923    regional maximum in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given `ksize`
1924    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and `strides` :math:`s = (s_0, s_1, s_2)`, the operation is as follows.
1925
1926    .. math::
1927        \text{output}(N_i, C_j, d, h, w) =
1928        \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
1929        \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
1930
1931    The output is a Tensor with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})` and its depth, height and
1932    width are:
1933
1934    .. math::
1935        \begin{array}{ll} \\
1936            D_{out} = \frac{D_{in} + 2 \times \text{pads}[0] - \text{dilation}[0] \times (\text{ksize}[0] - 1) - 1}
1937                {\text{stride}[0]} + 1 \\
1938            H_{out} = \frac{H_{in} + 2 \times \text{pads}[1] - \text{dilation}[1] \times (\text{ksize}[1] - 1) - 1}
1939                {\text{stride}[1]} + 1 \\
1940            W_{out} = \frac{W_{in} + 2 \times \text{pads}[2] - \text{dilation}[2] \times (\text{ksize}[2] - 1) - 1}
1941                {\text{stride}[2]} + 1 \\
1942        \end{array}
1943
1944    .. warning::
1945        This is an experimental API that is subject to change or deletion.
1946
1947    Args:
1948        ksize (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
1949            value, is an int number that represents depth, height and width of the kernel, or a tuple of
1950            three int numbers that represent depth, height and width respectively.
1951        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents the depth,
1952            height and width of movement are both strides, or a tuple of three int numbers that
1953            represent depth, height and width of movement respectively.
1954        pads (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both
1955            strides, or a tuple of three int numbers that represent depth, height and width of movement respectively.
1956        dilation (Union[int, tuple[int]]): Default: ``(1, 1, 1)`` .
1957        ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
1958        data_format (str) : The optional value for data format. Currently only support ``'NCDHW'`` .
1959            Default: ``'NCDHW'`` .
1960        argmax_type (mindspore.dtype) : The dtype for argmax. Default: ``mstype.int64`` .
1961
1962    Inputs:
1963        - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, D_{in}, H_{in}, W_{in})` with data type of int8,
1964          int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
1965
1966    Outputs:
1967        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
1968
1969        - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
1970          It has the same data type as `x`.
1971        - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32 or int64.
1972
1973    Raises:
1974        TypeError: If `x` is not a Tensor.
1975        ValueError: If length of shape of `x` is not equal to 5.
1976        TypeError: If `ksize` , `strides` , `pads` or `dilation` is not int or tuple.
1977        ValueError: If `ksize` or `strides` is less than 1.
1978        ValueError: If `pads` is less than 0.
1979        ValueError: If `data_format` is not ``'NCDHW'``.
1980        ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
1981
1982    Supported Platforms:
1983        ``Ascend`` ``GPU`` ``CPU``
1984
1985    Examples:
1986        >>> import mindspore
1987        >>> import numpy as np
1988        >>> from mindspore import Tensor, ops
1989        >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32)
1990        >>> max_pool3d_with_arg_op = ops.MaxPool3DWithArgmax(ksize=2, strides=1, pads=1)
1991        >>> output_tensor, argmax = max_pool3d_with_arg_op(x)
1992        >>> print(output_tensor.shape)
1993        (2, 1, 3, 3, 3)
1994        >>> print(argmax.shape)
1995        (2, 1, 3, 3, 3)
1996    """
1997
1998    @prim_attr_register
1999    def __init__(self, ksize, strides, pads, dilation=(1, 1, 1), ceil_mode=False,
2000                 data_format='NCDHW', argmax_type=mstype.int64):
2001        """Initialize MaxPool3DWithArgmax."""
2002        self.init_prim_io_names(inputs=['x'], outputs=['y', 'argmax'])
2003        validator.check_value_type('ceil_mode', ceil_mode, bool, self.name)
2004        validator.check_value_type('data_format', data_format, str, self.name)
2005        validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name)
2006        argmax_type_valid_values = (mstype.int32, mstype.int64)
2007        validator.check_type_name(
2008            "argmax_type", argmax_type, argmax_type_valid_values, self.name)
2009        self.data_format = validator.check_string(
2010            data_format, ['NCDHW'], 'data_format', self.name)
2011        if argmax_type == mstype.int32:
2012            self.add_prim_attr('argmax_type', 'int32')
2013        elif argmax_type == mstype.int64:
2014            self.add_prim_attr('argmax_type', 'int64')
2015        else:
2016            raise ValueError(f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, "
2017                             f"but got {self.argmax_type}.")
2018        self.ksize = _check_3d_int_or_tuple("ksize", ksize, self.name, ret_five=False)
2019        self.add_prim_attr('ksize', self.ksize)
2020        self.strides = _check_3d_int_or_tuple("strides", strides, self.name, ret_five=False)
2021        self.add_prim_attr('strides', self.strides)
2022        self.pads = _check_3d_int_or_tuple("pads", pads, self.name, greater_zero=False, ret_five=False)
2023        self.add_prim_attr('pads', self.pads)
2024        self.dilation = _check_3d_int_or_tuple("dilation", dilation, self.name, allow_five=True, ret_five=False)
2025        self.add_prim_attr('dilation', self.dilation)
2026
2027
2028class Conv2DTranspose(Conv2DBackpropInput):
2029    """
2030    Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
2031    also called deconvolution, although it is not an actual deconvolution. Because it cannot restore
2032    the original input data completely, but it can restore the shape of the original input.
2033
2034    Args:
2035        out_channel (int): The dimensionality of the output space.
2036        kernel_size (Union[int, tuple[int]]): The size of the convolution window.
2037        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
2038            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
2039
2040            - ``"same"``: Pad the input around its edges so that the shape of input and output
2041              are the same when `stride` is set to ``1``.
2042              The amount of padding to is calculated by the operator internally, If the amount is even, it is
2043              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
2044              If this mode is set, `pad` must be 0.
2045            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
2046              possible height and width. Extra pixels that could not complete a full stride will
2047              be discarded. If this mode is set, `pad` must be 0.
2048            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
2049              in the height and width directions is determined by the `pad` parameter.
2050              If this mode is set, `pad` must be greater than or equal to 0.
2051
2052            Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
2053        pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings
2054                    of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
2055                    the padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3]
2056                    correspondingly.
2057        pad_list (Union[str, None]): The pad list like (top, bottom, left, right). Default: ``None`` .
2058        mode (int): Modes for different convolutions. The value is currently not used. Default: ``1`` .
2059        stride (Union[int, tuple[int]]): The stride to be applied to the convolution filter. Default: ``1`` .
2060        dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
2061            Default: ``1`` .
2062        group (int): Splits input into groups. Default: ``1`` .
2063        data_format (str): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
2064            Default is ``'NCHW'`` .
2065
2066    Inputs:
2067        - **dout** (Tensor) - the gradients with respect to the output of the convolution.
2068          The shape conforms to the default data_format :math:`(N, C_{out}, H_{out}, W_{out})`.
2069        - **weight** (Tensor) - Set size of kernel is :math:`(K_1, K_2)`, then the shape is
2070          :math:`(C_{out}, C_{in}, K_1, K_2)`.
2071        - **input_size** (Tensor) - A tuple describes the shape of the input which conforms to the format
2072          :math:`(N, C_{in}, H_{in}, W_{in})`.
2073
2074    Outputs:
2075        Tensor, the gradients with respect to the input of convolution. It has the same shape as the input.
2076
2077    Raises:
2078        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
2079        TypeError: If `out_channel` or `group` is not an int.
2080        ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
2081        ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``.
2082        ValueError: If `padding` is a tuple whose length is not equal to 4.
2083        ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to (0, 0, 0, 0).
2084        ValueError: If `data_format` is neither ``'NCHW'`` nor ``'NHWC'``.
2085
2086    Supported Platforms:
2087        ``Ascend`` ``GPU`` ``CPU``
2088
2089    Examples:
2090        >>> import mindspore
2091        >>> import numpy as np
2092        >>> from mindspore import Tensor, ops
2093        >>> dout = Tensor(np.ones([10, 32, 30, 30]), mindspore.float32)
2094        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
2095        >>> x = Tensor(np.ones([10, 32, 32, 32]))
2096        >>> conv2d_transpose_input = ops.Conv2DTranspose(out_channel=32, kernel_size=3)
2097        >>> output = conv2d_transpose_input(dout, weight, ops.shape(x))
2098        >>> print(output.shape)
2099        (10, 32, 32, 32)
2100    """
2101
2102    @prim_attr_register
2103    def __init__(self, out_channel, kernel_size, pad_mode="valid", pad=0,
2104                 pad_list=None, mode=1, stride=1, dilation=1, group=1, data_format="NCHW"):
2105        """Initialize Conv2DTranspose."""
2106        super(Conv2DTranspose, self).__init__(out_channel, kernel_size, pad_mode, pad,
2107                                              pad_list, mode, stride, dilation, group, data_format)
2108
2109
2110class SoftmaxCrossEntropyWithLogits(Primitive):
2111    r"""
2112    Gets the softmax cross-entropy value between logits and labels with one-hot encoding.
2113
2114    The updating formulas of SoftmaxCrossEntropyWithLogits algorithm are as follows,
2115
2116    .. math::
2117        \begin{array}{ll} \\
2118            p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\
2119            loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})}
2120        \end{array}
2121
2122    where :math:`X` represents `logits`.
2123    :math:`Y` represents `label`.
2124    :math:`loss` represents `output`.
2125
2126    Inputs:
2127        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
2128        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`.
2129
2130    Outputs:
2131        Tuple of 2 tensors(loss, dlogits), the `loss` shape is :math:`(N,)`,
2132        and the `dlogits` with the same shape as `logits`.
2133
2134    Raises:
2135        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
2136        TypeError: If `logits` or `labels` is not a Tensor.
2137        ValueError: If shape of `logits` is not the same as `labels`.
2138
2139    Supported Platforms:
2140        ``Ascend`` ``GPU`` ``CPU``
2141
2142    Examples:
2143        >>> import mindspore
2144        >>> from mindspore import Tensor, ops
2145        >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32)
2146        >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32)
2147        >>> softmax_cross = ops.SoftmaxCrossEntropyWithLogits()
2148        >>> loss, dlogits = softmax_cross(logits, labels)
2149        >>> print(loss)
2150        [0.5899297  0.52374405]
2151        >>> print(dlogits)
2152        [[ 0.02760027  0.20393994  0.01015357  0.20393994 -0.44563377]
2153         [ 0.08015892  0.02948882  0.08015892 -0.4077012   0.21789455]]
2154    """
2155
2156    @prim_attr_register
2157    def __init__(self):
2158        pass
2159
2160
2161class SparseSoftmaxCrossEntropyWithLogits(Primitive):
2162    r"""
2163    Computes the softmax cross-entropy value between logits and sparse encoding labels.
2164
2165    Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
2166
2167    .. math::
2168        \begin{array}{ll} \\
2169            p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\
2170            loss_{ij} = \begin{cases} -ln(p_{ij}), &j = y_i \cr 0, & j \neq y_i \end{cases} \\
2171            loss = \sum_{ij} loss_{ij}
2172        \end{array}
2173
2174    Args:
2175        is_grad (bool): If ``True`` , this operation returns the computed gradient. Default: ``False`` .
2176
2177    Inputs:
2178        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
2179        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`.
2180          Data type must be int32 or int64.
2181
2182    Outputs:
2183        Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor;
2184        if `is_grad` is ``True`` , the output tensor is the gradient of input with the same shape as `logits`.
2185
2186    Raises:
2187        TypeError: If `is_grad` is not a bool.
2188        TypeError: If dtype of `logits` is neither float16 nor float32.
2189        TypeError: If dtype of `labels` is neither int32 nor int64.
2190        ValueError: If :math:`logits.shape[0] != labels.shape[0]`.
2191
2192    Supported Platforms:
2193        ``GPU`` ``CPU``
2194
2195    Examples:
2196        >>> import mindspore
2197        >>> from mindspore import Tensor, ops
2198        >>> logits = Tensor([[2, 3, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32)
2199        >>> labels = Tensor([0, 1], mindspore.int32)
2200        >>> sparse_softmax_cross = ops.SparseSoftmaxCrossEntropyWithLogits()
2201        >>> loss = sparse_softmax_cross(logits, labels)
2202        >>> print(loss)
2203        3.4878292
2204        >>> sparse_softmax_cross_grad = ops.SparseSoftmaxCrossEntropyWithLogits(is_grad=True)
2205        >>> loss_grad = sparse_softmax_cross_grad(logits, labels)
2206        >>> print(loss_grad)
2207        [[-0.48415753  0.04306427  0.00582811  0.11706084  0.3182043 ]
2208         [ 0.04007946 -0.4852556   0.04007946  0.2961494   0.10894729]]
2209    """
2210
2211    @prim_attr_register
2212    def __init__(self, is_grad=False):
2213        """Initialize SparseSoftmaxCrossEntropyWithLogits."""
2214        validator.check_value_type('is_grad', is_grad, [bool], self.name)
2215        self.init_prim_io_names(inputs=['features', 'labels'], outputs=['output'])
2216        self.is_grad = is_grad
2217        self.add_prim_attr('sens', 1.0)
2218
2219
2220class SparseSoftmaxCrossEntropyWithLogitsV2(Primitive):
2221    r"""
2222    Computes the softmax cross-entropy value between logits and sparse encoding labels.
2223
2224    Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
2225
2226    .. math::
2227        \begin{array}{ll} \\
2228            p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\
2229            loss_{ij} = \begin{cases} -ln(p_{ij}), &j = y_i \cr 0, & j \neq y_i \end{cases}
2230        \end{array}
2231
2232    Inputs:
2233        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
2234        - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`.
2235          Data type must be int32 or int64.
2236
2237    Outputs:
2238        - **loss** (Tensor) - With the same shape as `labels`, the same type as `logits`.
2239        - **backprop** (Tensor) - With the same shape and same type as `logits`.
2240
2241    Raises:
2242        TypeError: If dtype of `logits` is neither float16 nor float32.
2243        TypeError: If dtype of `labels` is neither int32 nor int64.
2244        ValueError: If logits.shape is not [batch x classes] or labels.shape is not [batch].
2245
2246    Supported Platforms:
2247        ``Ascend`` ``CPU``
2248
2249    Examples:
2250        >>> logits = Tensor([[2, 3, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32)
2251        >>> labels = Tensor([0, 1], mindspore.int32)
2252        >>> sparse_softmax_cross = ops.SparseSoftmaxCrossEntropyWithLogitsV2()
2253        >>> loss, backprop = sparse_softmax_cross(logits, labels)
2254        >>> print(loss)
2255        [3.4519143 3.523744 ]
2256        >>> print(backprop)
2257        [[-0.96831506  0.08612854  0.01165623  0.23412165  0.6364086 ]
2258         [ 0.08015893 -0.9705112   0.08015893  0.5922988   0.21789455]]
2259    """
2260
2261    @prim_attr_register
2262    def __init__(self):
2263        """Initialize SparseSoftmaxCrossEntropyWithLogitsV2."""
2264        self.init_prim_io_names(inputs=['features', 'labels'], outputs=['loss', 'backprop'])
2265
2266
2267class ApplyMomentum(Primitive):
2268    r"""
2269    Optimizer that implements the Momentum algorithm.
2270
2271    Refer to the paper `On the importance of initialization and momentum in deep
2272    learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_  for more details.
2273
2274    Inputs of `variable`, `accumulation` and `gradient` comply with the implicit type conversion rules
2275    to make the data types consistent.
2276    If they have different data types, the lower priority data type will be converted to
2277    the relatively highest priority data type.
2278
2279    Refer to :class:`mindspore.nn.Momentum` for more details about the formula and usage.
2280
2281    Args:
2282        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
2283                            from being updated. Default: ``False`` .
2284        use_nesterov (bool): Enable Nesterov momentum. Default: ``False`` .
2285        gradient_scale (float): The scale of the gradient. Default: ``1.0`` .
2286
2287    Inputs:
2288        - **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float, float16,
2289          int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
2290        - **accumulation** (Parameter) - Accumulated gradient value by moment weight,
2291          has the same data type with `variable`.
2292        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
2293          float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
2294          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
2295          complex64, complex128 data type.
2296        - **gradient** (Tensor) - Gradient, has the same data type as `variable`.
2297        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float64, int64, float, float16, int16, int32,
2298          int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
2299          a scalar tensor with float64, int64, float, float16, int16, int32, int8, uint16, uint32, uint64, uint8,
2300          complex64, complex128 data type.
2301
2302    Outputs:
2303        Tensor, parameters to be updated.
2304
2305    Raises:
2306        TypeError: If the `use_locking` or `use_nesterov` is not a bool or `gradient_scale` is not a float.
2307        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
2308
2309    Supported Platforms:
2310        ``Ascend`` ``GPU`` ``CPU``
2311
2312    Examples:
2313        >>> import mindspore
2314        >>> import numpy as np
2315        >>> from mindspore import Tensor, nn, ops, Parameter
2316        >>> class Net(nn.Cell):
2317        ...    def __init__(self):
2318        ...        super(Net, self).__init__()
2319        ...        self.apply_momentum = ops.ApplyMomentum()
2320        ...        self.variable = Parameter(Tensor(np.array([[0.6, 0.4],
2321        ...                                            [0.1, 0.5]]).astype(np.float32)), name="variable")
2322        ...        self.accumulate = Parameter(Tensor(np.array([[0.6, 0.5],
2323        ...                                            [0.2, 0.6]]).astype(np.float32)), name="accumulate")
2324        ...    def construct(self, lr, grad, moment):
2325        ...        out = self.apply_momentum(self.variable, self.accumulate, lr, grad, moment)
2326        ...        return out
2327        >>> net = Net()
2328        >>> lr = Tensor(0.1, mindspore.float32)
2329        >>> moment = Tensor(0.9, mindspore.float32)
2330        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
2331        >>> output = net(lr, grad, moment)
2332        >>> print(output)
2333        [[0.51600003 0.285     ]
2334        [0.072      0.366     ]]
2335    """
2336    __mindspore_signature__ = (
2337        sig.make_sig('variable', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
2338        sig.make_sig('accumulation', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
2339        sig.make_sig('learning_rate', dtype=sig.sig_dtype.T1),
2340        sig.make_sig('gradient', dtype=sig.sig_dtype.T),
2341        sig.make_sig('momentum', dtype=sig.sig_dtype.T2)
2342    )
2343
2344    @prim_attr_register
2345    def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
2346        """Initialize ApplyMomentum."""
2347        self.use_nesterov = validator.check_bool(use_nesterov, "use_nesterov", self.name)
2348        self.use_locking = validator.check_bool(use_locking, "use_locking", self.name)
2349        validator.check_value_type('gradient_scale', gradient_scale, [float], self.name)
2350        self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
2351                                outputs=['output'])
2352        self.add_prim_attr('side_effect_mem', True)
2353
2354
2355class SmoothL1Loss(Primitive):
2356    r"""
2357    Calculate the smooth L1 loss, and the L1 loss function has robustness.
2358
2359    Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
2360
2361    Args:
2362        beta (float, optional): A parameter used to control the point where the function will change between
2363            L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
2364        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2365            ``'sum'`` . Default: ``'none'`` .
2366
2367            - ``'none'``: no reduction will be applied.
2368            - ``'mean'``: compute and return the mean of elements in the output.
2369            - ``'sum'``: the output elements will be summed.
2370
2371    Inputs:
2372        - **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
2373        - **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
2374
2375    Outputs:
2376        Tensor, loss float tensor, same shape and dtype as the `logits`.
2377
2378    Supported Platforms:
2379        ``Ascend`` ``GPU`` ``CPU``
2380
2381    Examples:
2382        >>> import mindspore
2383        >>> import numpy as np
2384        >>> from mindspore import Tensor, ops
2385        >>> loss = ops.SmoothL1Loss()
2386        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
2387        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
2388        >>> output = loss(logits, labels)
2389        >>> print(output)
2390        [0.  0.  0.5]
2391    """
2392
2393    @prim_attr_register
2394    def __init__(self, beta=1.0, reduction='none'):
2395        """Initialize SmoothL1Loss."""
2396        validator.check_value_type('beta', beta, [float], self.name)
2397        validator.check('beta', beta, '', 0, validator.GT, self.name)
2398        validator.check_string(
2399            reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
2400        self.add_prim_attr('sigma', self.beta)
2401        self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
2402
2403
2404class MultiMarginLoss(Primitive):
2405    r"""
2406    Creates a loss function that minimizes the hinge loss
2407    for multi-class classification tasks.
2408    The loss is calculated by comparing the input and output of the function.
2409
2410    .. warning::
2411        This is an experimental API that is subject to change or deletion.
2412
2413    Refer to :func:`mindspore.ops.multi_margin_loss` for more details.
2414
2415    Args:
2416        p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` .
2417        margin (int, optional): A parameter to change pairwise distance. Default: ``1.0`` .
2418        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2419            ``'sum'`` . Default: ``'mean'`` .
2420
2421            - ``'none'``: no reduction will be applied.
2422            - ``'mean'``: compute and return the weighted mean of elements in the output.
2423            - ``'sum'``: the output elements will be summed.
2424
2425    Inputs:
2426        - **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16
2427          or float64.
2428        - **target** (Tensor) - Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The
2429          value of target should be non-negative, less than C.
2430        - **weight** (Tensor, optional) - The rescaling weight to each class with shape :math:`(C,)`. Data type only
2431          support float16, float32 or float64.
2432
2433    Outputs:
2434        Tensor, When `reduction` is ``'none'``, the shape is :math:`(N,)`.
2435        Otherwise, it is a scalar. Has the same data type with `inputs`.
2436
2437    Supported Platforms:
2438        ``Ascend`` ``GPU`` ``CPU``
2439
2440    Examples:
2441        >>> import mindspore
2442        >>> import numpy as np
2443        >>> from mindspore import Tensor, ops
2444        >>> x = Tensor(np.ones(shape=[3, 3]), mindspore.float32)
2445        >>> target = Tensor(np.array([1, 2, 1]), mindspore.int64)
2446        >>> weight = Tensor(np.array([1, 1, 1]), mindspore.float32)
2447        >>> loss = ops.MultiMarginLoss()
2448        >>> output = loss(x, target, weight)
2449        >>> print(output)
2450        0.6666667
2451    """
2452    __mindspore_signature__ = (
2453        sig.make_sig('x'),
2454        sig.make_sig('target'),
2455        sig.make_sig('weight', default=None)
2456    )
2457
2458    @prim_attr_register
2459    def __init__(self, p=1, margin=1.0, reduction="mean"):
2460        """Initialize MultiMarginLoss"""
2461        self.p = validator.check_value_type('p', p, [int], self.name)
2462        validator.check_int(p, {1, 2}, validator.IN, 'p', self.name)
2463        self.margin = validator.check_value_type('margin', margin, [float], self.name)
2464        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
2465        self.init_prim_io_names(inputs=['x', 'target', 'weight'], outputs=['y'])
2466
2467    def __call__(self, x, target, weight=None):
2468        return super().__call__(x, target, weight)
2469
2470
2471class SoftMarginLoss(Primitive):
2472    r"""
2473    SoftMarginLoss operation.
2474
2475    Creates a criterion that optimizes a two-class classification
2476    logistic loss between input tensor :math:`x` and target tensor :math:`y`
2477    (containing 1 or -1).
2478
2479    .. math::
2480        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
2481
2482    where :math:`x.nelement()` is the number of elements of x.
2483
2484    Args:
2485        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2486            ``'sum'`` . Default: ``'mean'`` .
2487
2488            - ``'none'``: no reduction will be applied.
2489            - ``'mean'``: compute and return the mean of elements in the output.
2490            - ``'sum'``: the output elements will be summed.
2491
2492    Inputs:
2493        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
2494        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
2495
2496    Outputs:
2497        Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`.
2498        Otherwise, a scalar value will be returned.
2499
2500    Raises:
2501        TypeError: If `logits` or `labels` is not a Tensor.
2502        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
2503        ValueError: If shape of `logits` is not the same as `labels`.
2504        ValueError: If `reduction` is not one of ``"none"`` , ``"mean"`` or ``"sum"`` .
2505
2506    Supported Platforms:
2507        ``Ascend`` ``GPU``
2508
2509    Examples:
2510        >>> import mindspore
2511        >>> import numpy as np
2512        >>> from mindspore import Tensor, ops
2513        >>> loss = ops.SoftMarginLoss()
2514        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
2515        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
2516        >>> output = loss(logits, labels)
2517        >>> print(output)
2518        0.6764238
2519    """
2520
2521    @prim_attr_register
2522    def __init__(self, reduction="mean"):
2523        """Initialize SoftMarginLoss"""
2524        self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss'])
2525        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
2526
2527
2528class L2Loss(Primitive):
2529    r"""
2530    Calculates half of the L2 norm, but do not square the result.
2531
2532    Set input as x and output as loss.
2533
2534    .. math::
2535        loss = \frac{\sum x ^ 2}{2}
2536
2537    Inputs:
2538        - **input_x** (Tensor) - Tensor for computing the L2 norm. Data type must be float16, float32 or float64.
2539
2540    Outputs:
2541        Tensor, has a Scalar Tensor with the same data type as `input_x`.
2542
2543    Raises:
2544        TypeError: If `input_x` is not a Tensor.
2545        TypeError: If dtype of `input_x` is not float16, float32 or float64.
2546
2547    Supported Platforms:
2548        ``Ascend`` ``GPU`` ``CPU``
2549
2550    Examples:
2551        >>> import mindspore
2552        >>> import numpy as np
2553        >>> from mindspore import Tensor, ops
2554        >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16)
2555        >>> l2_loss = ops.L2Loss()
2556        >>> output = l2_loss(input_x)
2557        >>> print(output)
2558        7.0
2559    """
2560
2561    @prim_attr_register
2562    def __init__(self):
2563        """Initialize L2Loss"""
2564
2565
2566class DataFormatDimMap(Primitive):
2567    """
2568    Returns the dimension index in the destination data format given in the source data format.
2569
2570    Args:
2571        src_format (str): An optional value for source data format. The format can be ``'NHWC'`` and ``'NCHW'`` .
2572            Default: ``'NHWC'`` .
2573        dst_format (str): An optional value for destination data format. The format can be ``'NHWC'`` and ``'NCHW'`` .
2574            Default: ``'NCHW'`` .
2575
2576    Inputs:
2577        - **input_x** (Tensor) - A Tensor, each element is used as a dimension index of the source data format.
2578          The suggested values are in the range [-4, 4). Only supports int32.
2579
2580    Outputs:
2581        Tensor, Return the dimension index in the given target data format,
2582        has the same data type and shape as the `input_x`.
2583
2584    Raises:
2585        TypeError: If `src_format` or `dst_format` is not a str.
2586        TypeError: If `input_x` is not a Tensor whose dtype is not int32.
2587
2588    Supported Platforms:
2589        ``Ascend`` ``GPU`` ``CPU``
2590
2591    Examples:
2592        >>> import mindspore
2593        >>> from mindspore import Tensor, ops
2594        >>> input_x = Tensor([0, 1, 2, 3], mindspore.int32)
2595        >>> dfdm = ops.DataFormatDimMap()
2596        >>> output = dfdm(input_x)
2597        >>> print(output)
2598        [0 3 1 2]
2599    """
2600
2601    @prim_attr_register
2602    def __init__(self, src_format='NHWC', dst_format='NCHW'):
2603        """Initialize DataFormatDimMap."""
2604        valid_values = ['NHWC', 'NCHW']
2605        self.src_format = validator.check_string(src_format, valid_values, "src_format", self.name)
2606        self.dst_format = validator.check_string(dst_format, valid_values, "dst_format", self.name)
2607        self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
2608
2609
2610class RNNTLoss(PrimitiveWithInfer):
2611    """
2612    Computes the RNNTLoss and its gradient with respect to the softmax outputs.
2613
2614    Args:
2615        blank_label (int): blank label. Default: ``0`` .
2616
2617    Inputs:
2618        - **acts** (Tensor) - Tensor of shape :math:`(B, T, U, V)`, where :math:`B` is batch,
2619          :math:`T` is sequence length, :math:`U` is label length and :math:`V` is output dim.
2620          Data type must be float16 or float32.
2621        - **labels** (Tensor) - Tensor of shape :math:`(B, U-1)`. Data type is int32.
2622        - **input_lengths** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32.
2623        - **label_lengths** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32.
2624
2625    Outputs:
2626        - **costs** (Tensor) - Tensor of shape :math:`(B,)`. Data type is int32.
2627        - **grads** (Tensor) - Has the same shape and dtype as `acts`.
2628
2629    Raises:
2630        TypeError: If `acts`, `labels`, `input_lengths` or `label_lengths` is not a Tensor.
2631        TypeError: If dtype of `acts` is neither float16 nor float32.
2632        TypeError: If dtype of `labels`, `input_lengths` or `label_lengths` is not int32.
2633
2634    Supported Platforms:
2635        ``Ascend``
2636
2637    Examples:
2638        >>> import numpy as np
2639        >>> from mindspore import ops, Tensor
2640        >>> B, T, U, V = 1, 2, 3, 5
2641        >>> blank = 0
2642        >>> acts = np.random.random((B, T, U, V)).astype(np.float32)
2643        >>> labels = np.array([[1, 2]]).astype(np.int32)
2644        >>> input_length = np.array([T] * B).astype(np.int32)
2645        >>> label_length = np.array([len(l) for l in labels]).astype(np.int32)
2646        >>> rnnt_loss = ops.RNNTLoss(blank_label=0)
2647        >>> costs, grads = rnnt_loss(Tensor(acts), Tensor(labels), Tensor(input_length), Tensor(label_length))
2648        >>> print(costs.shape)
2649        (1,)
2650        >>> print(grads.shape)
2651        (1, 2, 3, 5)
2652    """
2653
2654    @prim_attr_register
2655    def __init__(self, blank_label=0):
2656        """Initialize RNNTLoss."""
2657        validator.check_value_type('blank_label', blank_label, [int], self.name)
2658        self.init_prim_io_names(inputs=['acts', 'labels', 'input_length', 'label_length'],
2659                                outputs=['costs', 'grads'])
2660
2661    def infer_shape(self, acts_shape, labels_shape, input_length_shape, label_length_shape):
2662        validator.check_equal_int(len(acts_shape), 4, 'acts_rank', self.name)
2663        validator.check_equal_int(len(labels_shape), 2, 'labels_rank', self.name)
2664        validator.check_equal_int(len(input_length_shape), 1, 'input_length_rank', self.name)
2665        validator.check_equal_int(len(label_length_shape), 1, 'label_length_rank', self.name)
2666        validator.check('labels shape[0]', labels_shape[0], 'acts shape[0]', acts_shape[0], validator.EQ, self.name)
2667        validator.check('labels shape[1]', labels_shape[1], 'acts shape[2]-1',
2668                        acts_shape[2] - 1, validator.EQ, self.name)
2669        validator.check('input_length size', input_length_shape[0], 'acts shape[0]',
2670                        acts_shape[0], validator.EQ, self.name)
2671        validator.check('label_length size', label_length_shape[0], 'acts shape[0]',
2672                        acts_shape[0], validator.EQ, self.name)
2673        costs_shape = (acts_shape[0],)
2674        return costs_shape, acts_shape
2675
2676    def infer_dtype(self, acts_type, labels_type, input_length_type, label_length_type):
2677        validator.check_tensor_dtype_valid("acts_type", acts_type, [mstype.float32, mstype.float16], self.name)
2678        tuple(map(partial(validator.check_tensor_dtype_valid,
2679                          valid_dtypes=(mstype.int32,), prim_name=self.name),
2680                  ("labels", "input_length", "label_length"),
2681                  (labels_type, input_length_type, label_length_type)))
2682        return acts_type, acts_type
2683
2684
2685class SGD(PrimitiveWithCheck):
2686    """
2687    Computes the stochastic gradient descent. Momentum is optional.
2688
2689    Nesterov momentum is based on the formula from paper `On the importance of
2690    initialization and momentum in deep learning <http://proceedings.mlr.press/v28/sutskever13.html>`_.
2691
2692    Note:
2693        If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
2694        'beta' or 'gamma' in their names. Users can group parameters to change the strategy of decaying weight. When
2695        parameters are grouped, each group can set `weight_decay`. If not, the `weight_decay` in optimizer will be
2696        applied.
2697        For more details, please refer to :class:`mindspore.nn.SGD`.
2698
2699    Args:
2700        dampening (float): The dampening for momentum. Default: ``0.0`` .
2701        weight_decay (float): Weight decay (L2 penalty). Default: ``0.0`` .
2702        nesterov (bool): Enable Nesterov momentum. Default: ``False`` .
2703
2704    Inputs:
2705        - **parameters** (Tensor) - Parameters to be updated. With float16 or float32 data type.
2706        - **gradient** (Tensor) - Gradient, with float16 or float32 data type.
2707        - **learning_rate** (Tensor) - Learning rate, a scalar tensor with float16 or float32 data type.
2708          e.g. Tensor(0.1, mindspore.float32)
2709        - **accum** (Tensor) - Accum(velocity) to be updated. With float16 or float32 data type.
2710        - **momentum** (Tensor) - Momentum, a scalar tensor with float16 or float32 data type.
2711          e.g. Tensor(0.1, mindspore.float32).
2712        - **stat** (Tensor) - States to be updated with the same shape as gradient, with float16 or float32 data type.
2713
2714    Outputs:
2715        Tensor, parameters to be updated.
2716
2717    Raises:
2718        TypeError: If `dampening` or `weight_decay` is not a float.
2719        TypeError: If `nesterov` is not a bool.
2720        TypeError: If `parameters`, `gradient`, `learning_rate`, `accum`, `momentum` or `stat` is not a Tensor.
2721        TypeError: If dtype of `parameters`, `gradient`, `learning_rate`, `accum`, `momentum` or `stat` is neither
2722                   float16 nor float32.
2723
2724    Supported Platforms:
2725        ``Ascend`` ``GPU`` ``CPU``
2726
2727    Examples:
2728        >>> import mindspore
2729        >>> import numpy as np
2730        >>> from mindspore import Tensor, ops
2731        >>> sgd = ops.SGD()
2732        >>> parameters = Tensor(np.array([2, -0.5, 1.7, 4]), mindspore.float32)
2733        >>> gradient = Tensor(np.array([1, -1, 0.5, 2]), mindspore.float32)
2734        >>> learning_rate = Tensor(0.01, mindspore.float32)
2735        >>> accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mindspore.float32)
2736        >>> momentum = Tensor(0.1, mindspore.float32)
2737        >>> stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mindspore.float32)
2738        >>> output = sgd(parameters, gradient, learning_rate, accum, momentum, stat)
2739        >>> print(output.asnumpy())
2740        [1.99 -0.4903 1.695 3.9801]
2741    """
2742
2743    @prim_attr_register
2744    def __init__(self, dampening=0.0, weight_decay=0.0, nesterov=False):
2745        """Initialize SGD."""
2746        validator.check_value_type("nesterov", nesterov, [bool], self.name)
2747        if nesterov and dampening != 0:
2748            raise ValueError(f"For '{self.name}', the 'dampening' must be 0 when 'nesterov' is True, "
2749                             f"but got 'dampening' is {dampening} and 'nesterov' is {nesterov}.")
2750        self.init_prim_io_names(inputs=['parameters', 'gradient', 'learning_rate', 'accum', 'momentum', 'stat'],
2751                                outputs=['output'])
2752        self.add_prim_attr('side_effect_mem', True)
2753
2754    def check_shape(self, parameters_shape, gradient_shape, learning_rate_shape,
2755                    accum_shape, momentum_shape, stat_shape):
2756        validator.check_int(len(gradient_shape), 0, validator.GE, f'gradient rank', self.name)
2757        validator.check_int(len(learning_rate_shape), 0, validator.GE, f'learning rate rank', self.name)
2758        validator.check_int(len(momentum_shape), 0, validator.GE, f'momentum rank', self.name)
2759        validator.check_int(len(stat_shape), 0, validator.GE, f'stat rank', self.name)
2760
2761    def check_dtype(self, parameters_dtype, gradient_dtype, learning_rate_dtype,
2762                    accum_dtype, momentum_dtype, stat_dtype):
2763        tuple(map(partial(validator.check_tensor_dtype_valid,
2764                          valid_dtypes=(mstype.float16, mstype.float32), prim_name=self.name),
2765                  ("parameters", "gradient", "learning_rate", "accum", "momentum", "stat"),
2766                  (parameters_dtype, gradient_dtype, learning_rate_dtype, accum_dtype, momentum_dtype, stat_dtype)))
2767
2768
2769class ApplyRMSProp(PrimitiveWithInfer):
2770    r"""
2771    Optimizer that implements the Root Mean Square prop(RMSProp) algorithm.
2772    Please refer to the usage in source code of :class:`mindspore.nn.RMSProp`.
2773
2774    The updating formulas of ApplyRMSProp algorithm are as follows,
2775
2776    .. math::
2777        \begin{array}{ll} \\
2778            s_{t+1} = \rho s_{t} + (1 - \rho)(\nabla Q_{i}(w))^2 \\
2779            m_{t+1} = \beta m_{t} + \frac{\eta} {\sqrt{s_{t+1} + \epsilon}} \nabla Q_{i}(w) \\
2780            w = w - m_{t+1}
2781        \end{array}
2782
2783    where :math:`w` represents `var`, which will be updated.
2784    :math:`s_{t+1}` represents `mean_square`, :math:`s_{t}` is the last moment of :math:`s_{t+1}`,
2785    :math:`m_{t+1}` represents `moment`, :math:`m_{t}` is the last moment of :math:`m_{t+1}`.
2786    :math:`\rho` represents `decay`. :math:`\beta` is the momentum term, represents `momentum`.
2787    :math:`\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
2788    :math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`.
2789
2790    .. warning::
2791        Note that in dense implementation of this algorithm, "mean_square" and "moment" will update even if "grad" is 0,
2792        but in this sparse implementation, "mean_square" and "moment" will not update
2793        in iterations during which "grad" is 0.
2794
2795    Args:
2796        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
2797                            from being updated. Default: ``False`` .
2798
2799    Inputs:
2800        - **var** (Parameter) - Weights to be updated.
2801        - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
2802        - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
2803        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
2804          a scalar tensor with float16 or float32 data type.
2805        - **grad** (Tensor) - Gradient, must be the same type as `var`.
2806        - **decay** (float) - Decay rate. Only constant value is allowed.
2807        - **momentum** (float) - Momentum. Only constant value is allowed.
2808        - **epsilon** (float) - Ridge term. Only constant value is allowed.
2809
2810    Outputs:
2811        Tensor, parameters to be updated.
2812
2813    Raises:
2814        TypeError: If `use_locking` is not a bool.
2815        TypeError: If `var`, `mean_square`, `moment` or `decay` is not a Tensor.
2816        TypeError: If `learning_rate` is neither a Number nor a Tensor.
2817        TypeError: If dtype of `decay`, `momentum` or `epsilon` is not float.
2818        TypeError: If dtype of `learning_rate` is neither float16 nor float32.
2819        ValueError: If `decay`, `momentum` or `epsilon` is not a constant value.
2820
2821    Supported Platforms:
2822        ``Ascend`` ``GPU`` ``CPU``
2823
2824    Examples:
2825        >>> import numpy as np
2826        >>> from mindspore import Tensor, nn, ops, Parameter
2827        >>> class Net(nn.Cell):
2828        ...     def __init__(self):
2829        ...         super(Net, self).__init__()
2830        ...         self.apply_rms_prop = ops.ApplyRMSProp()
2831        ...         self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var")
2832        ...
2833        ...     def construct(self, mean_square, moment, grad, decay, momentum, epsilon, lr):
2834        ...         out = self.apply_rms_prop(self.var, mean_square, moment, lr, grad, decay, momentum, epsilon)
2835        ...         return out
2836        ...
2837        >>> net = Net()
2838        >>> mean_square = Tensor(np.ones([2, 2]).astype(np.float32))
2839        >>> moment = Tensor(np.ones([2, 2]).astype(np.float32))
2840        >>> grad = Tensor(np.ones([2, 2]).astype(np.float32))
2841        >>> output = net(mean_square, moment, grad, 0.0, 1e-10, 0.001, 0.01)
2842        >>> print(net.var.asnumpy())
2843        [[0.990005  0.990005]
2844         [0.990005  0.990005]]
2845    """
2846
2847    @prim_attr_register
2848    def __init__(self, use_locking=False):
2849        """Initialize ApplyRMSProp."""
2850        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
2851        self.init_prim_io_names(inputs=['var', 'mean_square', 'moment', 'learning_rate', 'grad',
2852                                        'rho', 'momentum', 'epsilon'], outputs=['output'])
2853        self.add_prim_attr('side_effect_mem', True)
2854
2855
2856class ApplyCenteredRMSProp(Primitive):
2857    r"""
2858    Optimizer that implements the centered RMSProp algorithm.
2859    Please refer to the usage in source code of :class:`mindspore.nn.RMSProp`.
2860
2861    The updating formulas of ApplyCenteredRMSProp algorithm are as follows,
2862
2863    .. math::
2864        \begin{array}{ll} \\
2865            g_{t+1} = \rho g_{t} + (1 - \rho)\nabla Q_{i}(w) \\
2866            s_{t+1} = \rho s_{t} + (1 - \rho)(\nabla Q_{i}(w))^2 \\
2867            m_{t+1} = \beta m_{t} + \frac{\eta} {\sqrt{s_{t+1} - g_{t+1}^2 + \epsilon}} \nabla Q_{i}(w) \\
2868            w = w - m_{t+1}
2869        \end{array}
2870
2871    where :math:`w` represents `var`, which will be updated.
2872    :math:`g_{t+1}` represents `mean_gradient`, :math:`g_{t}` is the last moment of :math:`g_{t+1}`.
2873    :math:`s_{t+1}` represents `mean_square`, :math:`s_{t}` is the last moment of :math:`s_{t+1}`,
2874    :math:`m_{t+1}` represents `moment`, :math:`m_{t}` is the last moment of :math:`m_{t+1}`.
2875    :math:`\rho` represents `decay`. :math:`\beta` is the momentum term, represents `momentum`.
2876    :math:`\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
2877    :math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`.
2878
2879    Note:
2880        The difference between `ApplyCenteredRMSProp` and `ApplyRMSProp` is that the former
2881        uses the centered RMSProp algorithm, and the centered RRMSProp algorithm uses an estimate of the centered second
2882        moment(i.e., the variance) for normalization, as opposed to regular RMSProp, which uses the (uncertained)
2883        second moment. This often helps with training, but is slightly more expensive in terms of computation and
2884        memory.
2885
2886    .. warning::
2887        In dense implementation of this algorithm, `mean_gradient`, `mean_square`, and `moment` will update
2888        even if the `grad` is zero. But in this sparse implementation, `mean_gradient`, `mean_square`, and `moment`
2889        will not update in iterations during which the `grad` is zero.
2890
2891    Args:
2892        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
2893                            from being updated. Default: ``False`` .
2894
2895    Inputs:
2896        - **var** (Parameter) - Weights to be updated.
2897        - **mean_gradient** (Tensor) - Mean gradients, must be the same type as `var`.
2898        - **mean_square** (Tensor) - Mean square gradients, must be the same type as `var`.
2899        - **moment** (Tensor) - Delta of `var`, must be the same type as `var`.
2900        - **grad** (Tensor) - Gradient, must be the same type as `var`.
2901        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
2902          a scalar tensor with float16 or float32 data type.
2903        - **decay** (float) - Decay rate.
2904        - **momentum** (float) - Momentum.
2905        - **epsilon** (float) - Ridge term.
2906
2907    Outputs:
2908        Tensor, parameters to be updated.
2909
2910    Raises:
2911        TypeError: If `use_locking` is not a bool.
2912        TypeError: If `var`, `mean_gradient`, `mean_square`, `moment` or `grad` is not a Tensor.
2913        TypeError: If `learing_rate` is neither a Number nor a Tensor.
2914        TypeError: If dtype of `learing_rate` is neither float16 nor float32.
2915        TypeError: If `decay`, `momentum` or `epsilon` is not a float.
2916
2917    Supported Platforms:
2918        ``Ascend`` ``GPU`` ``CPU``
2919
2920    Examples:
2921        >>> import numpy as np
2922        >>> from mindspore import Tensor, nn, ops, Parameter
2923        >>> class Net(nn.Cell):
2924        ...     def __init__(self):
2925        ...         super(Net, self).__init__()
2926        ...         self.apply_centerd_rms_prop = ops.ApplyCenteredRMSProp()
2927        ...         self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var")
2928        ...
2929        ...     def construct(self, mean_grad, mean_square, moment, grad, decay, momentum, epsilon, lr):
2930        ...         out = self.apply_centerd_rms_prop(self.var, mean_grad, mean_square, moment, grad,
2931        ...                                           lr, decay, momentum, epsilon)
2932        ...         return out
2933        ...
2934        >>> net = Net()
2935        >>> mean_grad = Tensor(np.ones([2, 2]).astype(np.float32))
2936        >>> mean_square = Tensor(np.ones([2, 2]).astype(np.float32))
2937        >>> moment = Tensor(np.ones([2, 2]).astype(np.float32))
2938        >>> grad = Tensor(np.ones([2, 2]).astype(np.float32))
2939        >>> output = net(mean_grad, mean_square, moment, grad, 0.0, 1e-10, 0.001, 0.01)
2940        >>> print(net.var.asnumpy())
2941        [[0.68377227  0.68377227]
2942         [0.68377227  0.68377227]]
2943    """
2944
2945    @prim_attr_register
2946    def __init__(self, use_locking=False):
2947        """Initialize ApplyCenteredRMSProp."""
2948        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
2949        self.add_prim_attr('side_effect_mem', True)
2950
2951
2952class L2Normalize(Primitive):
2953    r"""
2954    L2 Normalization Operator.
2955
2956    This operator will normalize the input using the given axis. The function is shown as follows:
2957
2958    .. math::
2959        \displaylines{{\text{output} = \frac{x}{\sqrt{\text{max}( \sum_{i}^{}\left | x_i  \right | ^2, \epsilon)}}}}
2960
2961    where :math:`\epsilon` is epsilon and :math:`\sum_{i}^{}\left | x_i  \right | ^2` calculate the sum of squares of
2962    the input `x` along the dimension `axis`.
2963
2964    Note:
2965        On Ascend, input data type of float64 is currently not supported.
2966
2967    Args:
2968        axis (Union[list(int), tuple(int), int], optional): Specify the axis for calculating the L2 norm.
2969            Default: ``0`` .
2970        epsilon (float, optional): A small value added for numerical stability. Default: ``1e-4`` .
2971
2972    Inputs:
2973        - **x** (Tensor) - Input to compute the normalization. Tensor of shape :math:`(N, *)`,
2974          where :math:`*` means any number of additional dimensions.
2975          Data type must be float16, float32 or float64.
2976
2977    Outputs:
2978        Tensor, with the same type and shape as the `x`.
2979
2980    Raises:
2981        TypeError: If `axis` is not one of the following: list, tuple or int.
2982        TypeError: If `epsilon` is not a float.
2983        TypeError: If `x` is not a Tensor.
2984        TypeError: If dtype of `x` is not in [float16, float32, float64].
2985        ValueError: If dimension of `x` is not greater than 0.
2986
2987    Supported Platforms:
2988        ``Ascend`` ``GPU`` ``CPU``
2989
2990    Examples:
2991        >>> import mindspore
2992        >>> import numpy as np
2993        >>> from mindspore import Tensor, ops
2994        >>> l2_normalize = ops.L2Normalize()
2995        >>> x = Tensor(np.random.randint(-256, 256, (2, 3, 4)), mindspore.float32)
2996        >>> output = l2_normalize(x)
2997        >>> print(output.shape)
2998        (2, 3, 4)
2999    """
3000
3001    @prim_attr_register
3002    def __init__(self, axis=0, epsilon=1e-4):
3003        """Initialize L2Normalize."""
3004        axis = [axis] if isinstance(axis, int) else axis
3005        validator.check_value_type('axis', axis, [list, tuple], self.name)
3006        validator.check_value_type('epsilon', epsilon, [int, float], self.name)
3007        self.add_prim_attr('axis', axis)
3008        self.init_attrs['axis'] = axis
3009        if len(axis) != 1:
3010            raise TypeError(f"For '{self.name}', the length of 'axis' must be 1, but got {len(axis)}, "
3011                            f"later will support multiple axis!")
3012        self.axis = axis
3013
3014
3015class GetNext(Primitive):
3016    """
3017    Returns the next element in the dataset queue.
3018
3019    Note:
3020        The GetNext operation needs to be associated with network and it also depends
3021        on the 'dataset' interface, For example, please refer to :class:`mindspore.dataset.MnistDataset` .
3022        it can't be used directly as a single operation.
3023        For details, please refer to :class:`mindspore.connect_network_with_dataset` source code.
3024
3025    Args:
3026        types (list[:class:`mindspore.dtype`]): The type of the outputs.
3027        shapes (list[tuple[int]]): The dimensionality of the outputs.
3028        output_num (int): The output number, length of `types` and `shapes`.
3029        shared_name (str): Queue name to fetch the data.
3030
3031    Inputs:
3032        No inputs.
3033
3034    Outputs:
3035        tuple[Tensor], the output of dataset. The shape is described in `shapes`
3036        and the type is described in `types`.
3037
3038    Supported Platforms:
3039        ``Ascend`` ``GPU``
3040
3041    Examples:
3042        >>> import mindspore
3043        >>> from mindspore import ops
3044        >>> from mindspore import dataset as ds
3045        >>> from mindspore import dtype as mstype
3046        >>> data_path = "/path/to/MNIST_Data/train/"
3047        >>> train_dataset = ds.MnistDataset(data_path, num_samples=10)
3048        >>> dataset_helper = mindspore.DatasetHelper(train_dataset, dataset_sink_mode=True)
3049        >>> dataset = dataset_helper.iter.dataset
3050        >>> dataset_types, dataset_shapes = dataset_helper.types_shapes()
3051        >>> queue_name = dataset.__transfer_dataset__.queue_name
3052        >>> get_next = ops.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name)
3053        >>> data, label = get_next()
3054        >>> relu = ops.ReLU()
3055        >>> result = relu(data.astype(mstype.float32))
3056        >>> print(result.shape)
3057        (28, 28, 1)
3058    """
3059
3060    @prim_attr_register
3061    def __init__(self, types, shapes, output_num, shared_name):
3062        """Initialize GetNext."""
3063        validator.check_value_type("types", types, [list, tuple], self.name)
3064        validator.check_value_type("shapes", shapes, [list, tuple], self.name)
3065        validator.check("types length", len(types), "shapes length", len(shapes), validator.EQ, self.name)
3066        validator.check_value_type("output_num", output_num, [int], self.name)
3067
3068
3069class LSTM(Primitive):
3070    r"""
3071    Performs the Long Short-Term Memory (LSTM) on the input.
3072
3073    For more information, please refer to :class:`mindspore.nn.LSTM`.
3074
3075    Args:
3076        input_size (int): Number of features of input.
3077        hidden_size (int):  Number of features of hidden layer.
3078        num_layers (int): Number of layers of stacked LSTM.
3079        has_bias (bool): Whether the cell has bias `b_ih` and `b_hh`.
3080        bidirectional (bool): Specifies whether it is a bidirectional LSTM.
3081        dropout (float): If not 0, append `Dropout` layer on the outputs of each
3082            LSTM layer except the last layer. The range of dropout is [0.0, 1.0].
3083        proj_size (int): If `proj_size` > 0, a projection of the corresponding size will be used,
3084            which is only supported on CPU now. Default: ``0`` .
3085
3086    Inputs:
3087        - **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or
3088          :math:`(batch\_size, seq\_len, input\_size)`.
3089        - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
3090        - **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
3091        - **w** (Tensor) - A weight Tensor.
3092
3093        If :math:`proj\_size > 0` , :math:`real\_hidden\_size = proj\_size` , otherwise
3094        :math:`real\_hidden\_size = hidden\_size` .
3095
3096    Outputs:
3097        Tuple, a tuple contains `(output, h_n, c_n, reserve, state)`.
3098
3099        - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * real\_hidden\_size)`.
3100        - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
3101        - **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
3102        - **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`.
3103        - **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`.
3104
3105    Raises:
3106        TypeError: If `input_size`, `hidden_size` or `num_layers` is not an int.
3107        TypeError: If `has_bias` or `bidirectional` is not a bool.
3108        TypeError: If `dropout` is not a float.
3109        ValueError: If `dropout` is not in range [0.0, 1.0].
3110        ValueError: If `proj_size` is not in range [0, `hidden_size`).
3111
3112    Supported Platforms:
3113        ``GPU`` ``CPU``
3114
3115    Examples:
3116        >>> import numpy as np
3117        >>> from mindspore import Tensor, ops
3118        >>> input_size = 10
3119        >>> hidden_size = 2
3120        >>> num_layers = 1
3121        >>> seq_len = 5
3122        >>> batch_size = 2
3123        >>>
3124        >>> net = ops.LSTM(input_size, hidden_size, num_layers, True, False, 0.0)
3125        >>> input_tensor = Tensor(np.ones([seq_len, batch_size, input_size]).astype(np.float32))
3126        >>> h0 = Tensor(np.ones([num_layers, batch_size, hidden_size]).astype(np.float32))
3127        >>> c0 = Tensor(np.ones([num_layers, batch_size, hidden_size]).astype(np.float32))
3128        >>> w = Tensor(np.ones([112, 1, 1]).astype(np.float32))
3129        >>> output, hn, cn, _, _ = net(input_tensor, h0, c0, w)
3130        >>> print(output)
3131        [[[0.9640267  0.9640267 ]
3132          [0.9640267  0.9640267 ]]
3133         [[0.9950539  0.9950539 ]
3134          [0.9950539  0.9950539 ]]
3135         [[0.99932843 0.99932843]
3136          [0.99932843 0.99932843]]
3137         [[0.9999084  0.9999084 ]
3138          [0.9999084  0.9999084 ]]
3139         [[0.9999869  0.9999869 ]
3140          [0.9999869  0.9999869 ]]]
3141    """
3142
3143    @prim_attr_register
3144    def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size=0):
3145        """Initialize LSTM."""
3146        self.input_size = validator.check_positive_int(input_size, "input_size", self.name)
3147        self.hidden_size = validator.check_positive_int(hidden_size, "hidden_size", self.name)
3148        self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT,
3149                                                   'proj_size', self.name)
3150        self.num_layers = validator.check_positive_int(num_layers, "num_layers", self.name)
3151        self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
3152        self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
3153        self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
3154        self.dropout = validator.check_float_range(dropout, 0, 1, validator.INC_BOTH, 'dropout', self.name)
3155        self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT,
3156                                                   'proj_size', self.name)
3157
3158        if bidirectional:
3159            self.num_directions = 2
3160        else:
3161            self.num_directions = 1
3162
3163    def infer_shape(self, x_shape, h_shape, c_shape, w_shape):
3164        validator.check_equal_int(len(x_shape), 3, "x rank", self.name)
3165        validator.check_equal_int(x_shape[2], self.input_size, "x[2]", self.name)
3166
3167        # h and c should be same shape
3168        validator.check_equal_int(len(h_shape), 3, "h rank", self.name)
3169        if self.proj_size == 0:
3170            validator.check("h_shape", h_shape, "c_shape", c_shape, Rel.EQ, self.name)
3171
3172        real_hidden_size = self.proj_size if self.proj_size > 0 else self.hidden_size
3173        validator.check_int(h_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h[0]", self.name)
3174        validator.check_equal_int(h_shape[1], x_shape[1], "h[1]", self.name)
3175        validator.check_int(h_shape[2], real_hidden_size, Rel.EQ, "h[2]", self.name)
3176
3177        y_shape = (x_shape[0], x_shape[1], real_hidden_size * self.num_directions)
3178
3179        # set arbitrary shape for reserved space
3180        reserved_shape = (1, 1)
3181        state_shape = (1, 1)
3182        return y_shape, h_shape, c_shape, reserved_shape, state_shape
3183
3184    def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype):
3185        args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
3186        validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name)
3187        return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype
3188
3189
3190class SigmoidCrossEntropyWithLogits(Primitive):
3191    r"""
3192    Uses the given logits to compute sigmoid cross entropy between the logits and the label.
3193
3194    Measures the distribution error in discrete classification tasks where each class is independent
3195    and not mutually exclusive using cross entropy loss.
3196
3197    Sets input logits as :math:`X`, input label as :math:`Y`, output as :math:`loss`. Then,
3198
3199    .. math::
3200
3201        \begin{array}{ll} \\
3202            p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}} \\
3203            loss_{ij} = -[Y_{ij} * ln(p_{ij}) + (1 - Y_{ij})ln(1 - p_{ij})]
3204        \end{array}
3205
3206    Inputs:
3207        - **logits** (Tensor) - Input logits. Tensor of shape :math:`(N, *)` where :math:`*` means any number
3208          of additional dimensions.
3209        - **label** (Tensor) - Ground truth label. With the same shape and type as `logits`.
3210
3211    Outputs:
3212        Tensor, with the same shape and type as input `logits`.
3213
3214    Raises:
3215        TypeError: If `logits` or `label` is not a Tensor.
3216
3217    Supported Platforms:
3218        ``Ascend`` ``GPU`` ``CPU``
3219
3220    Examples:
3221        >>> import numpy as np
3222        >>> from mindspore import Tensor, ops
3223        >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]).astype(np.float32))
3224        >>> labels = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]).astype(np.float32))
3225        >>> sigmoid = ops.SigmoidCrossEntropyWithLogits()
3226        >>> output = sigmoid(logits, labels)
3227        >>> print(output)
3228        [[ 0.6111007   0.5032824   0.26318604]
3229         [ 0.58439666  0.5530153  -0.4368139 ]]
3230    """
3231
3232    @prim_attr_register
3233    def __init__(self):
3234        """Initialize SigmoidCrossEntropyWithLogits"""
3235        self.init_prim_io_names(inputs=['predict', 'target'], outputs=['loss'])
3236
3237
3238class Pad(Primitive):
3239    r"""
3240    Pads the input tensor according to the paddings.
3241
3242    Refer to :func:`mindspore.ops.pad` for more details. Use :func:`mindspore.ops.pad` instead if `paddings` has
3243    negative values.
3244
3245    Args:
3246        paddings (tuple): The shape of parameter `paddings` is (N, 2). N is the rank of input data. All elements of
3247            paddings are int type. For the input in `D` th dimension, paddings[D, 0] indicates how many sizes to be
3248            extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1] indicates how many sizes to
3249            be extended behind the input tensor in the `D` th dimension.
3250
3251    Inputs:
3252        - **input_x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
3253          any number of additional dimensions.
3254
3255    Outputs:
3256        Tensor, the tensor after padding.
3257
3258    Raises:
3259        TypeError: If `paddings` is not a tuple.
3260        TypeError: If `input_x` is not a Tensor.
3261        ValueError: If shape of `paddings` is not :math:`(N, 2)`.
3262        ValueError: If paddings.size is not equal to 2 * len(input_x).
3263
3264    Supported Platforms:
3265        ``Ascend`` ``GPU`` ``CPU``
3266
3267    Examples:
3268        >>> import mindspore
3269        >>> import numpy as np
3270        >>> from mindspore import Tensor, ops
3271        >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
3272        >>> pad_op = ops.Pad(((1, 2), (2, 1)))
3273        >>> output = pad_op(input_x)
3274        >>> print(output)
3275        [[ 0.   0.   0.   0.   0.   0. ]
3276         [ 0.   0.  -0.1  0.3  3.6  0. ]
3277         [ 0.   0.   0.4  0.5 -3.2  0. ]
3278         [ 0.   0.   0.   0.   0.   0. ]
3279         [ 0.   0.   0.   0.   0.   0. ]]
3280    """
3281
3282    @prim_attr_register
3283    def __init__(self, paddings):
3284        """Initialize Pad"""
3285        self.init_prim_io_names(inputs=['x'], outputs=['y'])
3286        validator.check_value_type("paddings", paddings, [tuple], self.name)
3287        self.paddings = paddings
3288
3289
3290class PadV3(Primitive):
3291    """
3292    Pads the input Tensor according to the `paddings`, `mode` and `paddings_contiguous`.
3293
3294    Args:
3295        mode (str, optional): An optional string indicates padding mode,
3296            support ``"constant"`` , ``"reflect"`` , ``"edge"`` , ``"circular"`` . Default: ``"constant"`` .
3297            The effects of various padding modes are as follows:
3298
3299            - ``"constant"``: Pads the input Tensor with value specified by `constant_value`.
3300            - ``"reflect"``: Pads the input Tensor by reflecting the values of the pixels at the
3301              boundary of the Tensor.
3302            - ``"edge"``: Pads the input Tensor with the values of the pixels on the border of the Tensor.
3303            - ``"circular"``: Circular padding mode. In this mode, the pixels from one edge of the image
3304              are wrapped around to the opposite edge, such that the pixel on the right edge of the
3305              image is replaced with the pixel on the left edge, and the pixel on the bottom edge
3306              is replaced with the pixel on the top edge.
3307
3308        paddings_contiguous (bool, optional): An optional bool value indicates if the padding is paddings_contiguous.
3309            If ``True`` , paddings is arranged as [begin0, end0, begin1, end1, ...]
3310            If ``False`` , paddings is arranged as [begin0, begin1, ..., end1, end2, ...]
3311            Default: ``True`` .
3312
3313    Inputs:
3314        - **x** (Tensor) - Tensor to be padded. It has shape :math:`(N, *)`, where :math:`*` means
3315          any number of additional dimensions.
3316        - **paddings** (Tensor) -  Specifies the number of zeros to be padded before and after each
3317          dimension of the input Tensor `x`. It's a 1D Tensor of type int32 or int64.
3318        - **constant_value** (Tensor, optional) - Padding value to use in 'constant' mode,
3319          if not specified, 0 is used instead. It has the same type as `x`.
3320
3321    Outputs:
3322        Tensor, the tensor after padding.
3323
3324    Raises:
3325        TypeError: If `x` or `paddings` is not a Tensor.
3326        TypeError: If `padding_contiguous` is not a bool.
3327        ValueError: If `mode` is not a str or not in support modes.
3328        ValueError: If `mode` is "constant", the element's number of `paddings` not be even.
3329        ValueError: If `mode` is "constant", the element's number of `paddings` large than input dim * 2.
3330        ValueError: If `mode` is "edge" "reflect" or "circular", the element's number of `paddings` is not 2, 4 or 6.
3331        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 3,
3332            the element's number of `paddings` is not 2.
3333        ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 4,
3334            the element's number of `paddings` is not 4.
3335        ValueError: If `mode` is "circular", `x` dims equals 5, the element's number of `paddings` is not 6.
3336        ValueError: If `mode` is "edge", "reflect" or "circular", `x` dims smaller than 3.
3337        ValueError: If `mode` is "edge" or "circular", x dims bigger than 5.
3338        ValueError: If `mode` is "reflect", x dims bigger than 4.
3339        ValueError: If `mode` is "reflect", padding size bigger than the corresponding `x` dimension.
3340        ValueError: After padding, output's shape number is not greater than 0.
3341
3342    Supported Platforms:
3343        ``Ascend`` ``GPU`` ``CPU``
3344
3345    Examples:
3346        >>> # case1: mode="reflect", paddings_contiguous=True
3347        >>> class Net(nn.Cell):
3348        ...    def __init__(self, mode, paddings_contiguous):
3349        ...        super(Net, self).__init__()
3350        ...        self.pad = ops.PadV3(mode=mode, paddings_contiguous=paddings_contiguous)
3351        ...        self.paddings = Tensor([1, 1])
3352        ...    def construct(self, x):
3353        ...        return self.pad(x, self.paddings)
3354        ...
3355        >>> x = Tensor([[[0., 1.]]])
3356        >>> pad = Net(mode="reflect", paddings_contiguous=True)
3357        >>> output = pad(x)
3358        >>> print(output)
3359        [[[1. 0. 1. 0.]]]
3360        >>> # case2: mode="constant", padding_contigous=False
3361        >>> class Net(nn.Cell):
3362        ...    def __init__(self, mode, paddings_contiguous):
3363        ...        super(Net, self).__init__()
3364        ...        self.pad = ops.PadV3(mode=mode, paddings_contiguous=paddings_contiguous)
3365        ...        self.paddings = Tensor([1, 0, 1, 0])
3366        ...        self.value = Tensor(1.5)
3367        ...    def construct(self, x):
3368        ...        return self.pad(x, self.paddings, self.value)
3369        ...
3370        >>> x = Tensor([[0., 1., 2.]])
3371        >>> pad = Net(mode="constant", paddings_contiguous=False)
3372        >>> output = pad(x)
3373        >>> print(output)
3374        [[1.5 0. 1. 2. 1.5]]
3375    """
3376
3377    @prim_attr_register
3378    def __init__(self, mode='constant', paddings_contiguous=True):
3379        """Initialize PadV3"""
3380        self.init_prim_io_names(inputs=['x', 'paddings', 'constant_value'], outputs=['y'])
3381        validator.check_string(mode, ['constant', 'reflect', 'edge', 'circular'], 'mode', self.name)
3382        validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name)
3383        self.mode = mode
3384        self.paddings_contiguous = paddings_contiguous
3385
3386
3387class MirrorPad(Primitive):
3388    """
3389    Pads the input tensor according to the paddings and mode.
3390
3391    Args:
3392        mode (str, optional): An optional string specifying the pad method.
3393            The optional values are ``'REFLECT'`` and ``'SYMMETRIC'`` .
3394            Default: ``'REFLECT'`` .
3395
3396            - ``'REFLECT'``: Reflect the value on the edge while omitting the last one.
3397              For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
3398            - ``'SYMMETRIC'``: Reflect the value on the edge while repeating the last one.
3399              For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
3400
3401    Inputs:
3402        - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
3403          additional dimensions.
3404        - **paddings** (Tensor) - Paddings requires constant tensor. The value of `paddings` is a
3405          matrix(list), and its shape is :math:`(N, 2)`. N is the rank of input data. All elements of paddings
3406          are int type. For the input in the `D` th dimension, paddings[D, 0] indicates how many sizes
3407          to be extended ahead of the input tensor in the `D` th dimension, and paddings[D, 1]
3408          indicates how many sizes to be extended behind the input tensor in the `D` th dimension. Both
3409          paddings[D, 0] and paddings[D, 1] must be no greater than input_x.dim_size(D)
3410          (or input_x.dim_size(D) - 1) if mode is SYMMETRIC (if REFLECT, respectively).
3411
3412    Outputs:
3413        Tensor, the tensor after padding.
3414
3415        - If `mode` is ``'REFLECT'``, it uses a way of symmetrical copying through the axis of symmetry to fill in.
3416          If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the
3417          `Outputs` is [[6,5,4,5,6,5,4], [3,2,1,2,3,2,1], [6,5,4,5,6,5,4], [9,8,7,8,9,8,7], [6,5,4,5,6,5,4]].
3418          For a more intuitive understanding, please see the example below.
3419        - If `mode` is ``'SYMMETRIC'``, the filling method is similar to the ``'REFLECT'``. It is also copied
3420          according to the symmetry axis, except that it includes the symmetry axis. If the `input_x`
3421          is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the `Outputs` is
3422          [[2,1,1,2,3,3,2], [2,1,1,2,3,3,2], [5,4,4,5,6,6,5], [8,7,7,8,9,9,8], [8,7,7,8,9,9,8]].
3423          For a more intuitive understanding, please see the example below.
3424
3425    Raises:
3426        TypeError: If `input_x` or `paddings` is not a Tensor.
3427        TypeError: If `mode` is not a str.
3428        ValueError: If paddings.size is not equal to 2 * rank of input_x.
3429
3430    Supported Platforms:
3431        ``Ascend`` ``GPU`` ``CPU``
3432
3433    Examples:
3434        >>> from mindspore import Tensor, nn, ops
3435        >>> # case1: mode="REFLECT"
3436        >>> class Net(nn.Cell):
3437        ...    def __init__(self, mode):
3438        ...        super(Net, self).__init__()
3439        ...        self.pad = ops.MirrorPad(mode=mode)
3440        ...        self.paddings = Tensor([[1, 1], [2, 2]])
3441        ...    def construct(self, input_x):
3442        ...        return self.pad(input_x, self.paddings)
3443        ...
3444        >>> input_x = Tensor([[1,2,3], [4,5,6], [7,8,9]])
3445        >>> pad = Net("REFLECT")
3446        >>> output = pad(input_x)
3447        >>> print(output)
3448        [[6 5 4 5 6 5 4]
3449         [3 2 1 2 3 2 1]
3450         [6 5 4 5 6 5 4]
3451         [9 8 7 8 9 8 7]
3452         [6 5 4 5 6 5 4]]
3453        >>> # case2: mode="SYMMETRIC"
3454        >>> pad = Net("SYMMETRIC")
3455        >>> output = pad(input_x)
3456        >>> print(output)
3457        [[2 1 1 2 3 3 2]
3458         [2 1 1 2 3 3 2]
3459         [5 4 4 5 6 6 5]
3460         [8 7 7 8 9 9 8]
3461         [8 7 7 8 9 9 8]]
3462    """
3463
3464    @prim_attr_register
3465    def __init__(self, mode='REFLECT'):
3466        """Initialize Pad"""
3467        self.init_prim_io_names(inputs=['x', 'paddings'], outputs=['y'])
3468        validator.check_string(mode, ['REFLECT', 'SYMMETRIC'], 'mode', self.name)
3469        self.mode = mode
3470
3471
3472class ComputeAccidentalHits(Primitive):
3473    r"""
3474    Compute accidental hits of sampled classes which match target classes.
3475
3476    When a target class matches the sample class, we call it "accidental hit".
3477    The result of calculating accidental hits contain three parts (index, id, weight),
3478    where index represents the row number in true_classes, and id represents the position in sampled_candidates,
3479    the weight is FLOAT_MAX. FLOAT_MAX indicates the max value in the type of Float
3480
3481    Args:
3482        num_true (int): The number of target classes per training example. Default: ``1`` .
3483
3484    Inputs:
3485        - **true_classes** (Tensor) - The target classes. With data type of int64
3486          and shape :math:`(batch\_size, num\_true)`.
3487        - **sampled_candidates** (Tensor) - The Candidate sampling results of operators, types of training samples,
3488          with data type of int64 and shape :math:`(num\_sampled, )`.
3489
3490    Outputs:
3491        Tuple of 3 Tensors.
3492
3493        - **indices** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`,
3494          with data type of int32.
3495        - **ids** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`,
3496          with data type of int64.
3497        - **weights** (Tensor) - A Tensor with shape :math:`(num\_accidental\_hits, )`, with the type float32.
3498
3499    Raises:
3500        TypeError: If dtype of `num_true` is not int.
3501        TypeError: If `true_classes` or `sampled_candidates` is not a Tensor.
3502        TypeError: If dtype of `true_classes` or `sampled_candidates` is neither int32 nor int64.
3503
3504    Supported Platforms:
3505        ``Ascend``
3506
3507    Examples:
3508        >>> import numpy as np
3509        >>> from mindspore import Tensor, ops
3510        >>> true_classes = np.array([[1, 2], [0, 4], [3, 3]])
3511        >>> sampled_candidates = np.array([0, 1, 2, 3, 4])
3512        >>> sampler = ops.ComputeAccidentalHits(2)
3513        >>> indices, ids, weights = sampler(Tensor(true_classes), Tensor(sampled_candidates))
3514        >>> print(indices, ids, weights)
3515        [0 0 1 1 2 2]
3516        [1 2 0 4 3 3]
3517        [-3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38 -3.4028235e+38]
3518
3519    """
3520
3521    @prim_attr_register
3522    def __init__(self, num_true=1):
3523        """Initialize ComputeAccidentalHits"""
3524        self.init_prim_io_names(inputs=['true_classes', 'sampled_candidates'],
3525                                outputs=['indices', 'ids', 'weights'])
3526        validator.check_value_type("num_true", num_true, [int], self.name)
3527        validator.check_number("num_true", num_true, 1, validator.GE, self.name)
3528        self.num_true = num_true
3529
3530
3531class ROIAlign(Primitive):
3532    r"""
3533    Computes the Region of Interest (RoI) Align operator.
3534
3535    The operator computes the value of each sampling point by bilinear interpolation from the nearby grid points on the
3536    feature map. No quantization is performed on any coordinates involved in the RoI, its bins, or the sampling
3537    points. The details of (RoI) Align operator are described in `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_.
3538
3539    Args:
3540        pooled_height (int): The output features height.
3541        pooled_width (int): The output features width.
3542        spatial_scale (float): A scaling factor that maps the raw image coordinates to the input
3543            feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
3544            input feature map, the `spatial_scale` must be `fea_h / ori_h`.
3545        sample_num (int): Number of sampling points. Default: ``2`` .
3546        roi_end_mode (int): Number must be 0 or 1. If roi_end_mode=0, use the legacy implementation.
3547            If roi_end_mode=1, end pixel of the roi_box will be shifted by +1*spatial_scale. Default: ``1`` .
3548
3549
3550    Inputs:
3551        - **features** (Tensor) - The input features, whose shape must be :math:`(N, C, H, W)`, with data type of
3552          float16 or float32.
3553        - **rois** (Tensor) - The shape is :math:`(rois\_n, 5)`, with data type of float16 or float32.
3554          `rois_n` represents the number of RoI. The size of the second dimension must be `5` and the `5` colunms
3555          are :math:`(image\_index, top\_left\_x, top\_left\_y, bottom\_right\_x, bottom\_right\_y)`.
3556          `image_index` represents the index of image. `top_left_x` and `top_left_y` represent the `x, y`
3557          coordinates of the top left corner of corresponding RoI, respectively. `bottom_right_x` and `bottom_right_y`
3558          represent the `x, y` coordinates of the bottom right corner of corresponding RoI, respectively.
3559
3560    Outputs:
3561        Tensor, the shape is :math:`(rois\_n, C, pooled\_height, pooled\_width)`.
3562
3563    Raises:
3564        TypeError: If `pooled_height`, `pooled_width`, `sample_num` or `roi_end_mode` is not an int.
3565        TypeError: If `spatial_scale` is not a float.
3566        TypeError: If `features` or `rois` is not a Tensor.
3567
3568    Supported Platforms:
3569        ``Ascend`` ``GPU`` ``CPU``
3570
3571    Examples:
3572        >>> import mindspore
3573        >>> import numpy as np
3574        >>> from mindspore import Tensor, ops
3575        >>> features = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)
3576        >>> rois = Tensor(np.array([[0, 0.2, 0.3, 0.2, 0.3]]), mindspore.float32)
3577        >>> roi_align = ops.ROIAlign(2, 2, 0.5, 2)
3578        >>> output = roi_align(features, rois)
3579        >>> print(output)
3580        [[[[1.775 2.025]
3581           [2.275 2.525]]]]
3582    """
3583
3584    @prim_attr_register
3585    def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1):
3586        """Initialize ROIAlign"""
3587        validator.check_value_type("pooled_height", pooled_height, [int], self.name)
3588        validator.check_value_type("pooled_width", pooled_width, [int], self.name)
3589        validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
3590        validator.check_value_type("sample_num", sample_num, [int], self.name)
3591        validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
3592        validator.check_int_range(roi_end_mode, 0, 1, validator.INC_BOTH, "roi_end_mode", self.name)
3593        self.pooled_height = pooled_height
3594        self.pooled_width = pooled_width
3595        self.spatial_scale = spatial_scale
3596        self.sample_num = sample_num
3597        self.roi_end_mode = roi_end_mode
3598
3599
3600class Adam(Primitive):
3601    r"""
3602    Updates gradients by the Adaptive Moment Estimation (Adam) algorithm.
3603
3604    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
3605
3606    For more details, please refer to :class:`mindspore.nn.Adam`.
3607
3608    The updating formulas are as follows,
3609
3610    .. math::
3611        \begin{array}{ll} \\
3612            m = \beta_1 * m + (1 - \beta_1) * g \\
3613            v = \beta_2 * v + (1 - \beta_2) * g * g \\
3614            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
3615            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
3616        \end{array}
3617
3618    :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
3619    `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
3620    :math:`t` represents updating step while :math:`beta_1^t(\beta_1^{t})` and :math:`beta_2^t(\beta_2^{t})`
3621    represent `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`,
3622    :math:`\epsilon` represents
3623    `epsilon`.
3624
3625    Inputs of `var`, `m`, `v` and `gradient`
3626    comply with the implicit type conversion rules to make the data types consistent.
3627    If they have different data types, the lower priority data type will be converted to
3628    the relatively highest priority data type.
3629
3630    Args:
3631        use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
3632            If ``True`` , updates of the var, m, and v tensors will be protected by a lock.
3633            If ``False`` , the result is unpredictable. Default: ``False`` .
3634        use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
3635            If ``True`` , update the gradients using NAG.
3636            If ``False`` , update the gradients without using NAG. Default: ``False`` .
3637
3638    Inputs:
3639        - **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
3640          any number of additional dimensions. The data type can be float16 or float32.
3641        - **m** (Parameter) - The 1st moment vector in the updating formula,
3642          the shape should be the same as `var`.
3643        - **v** (Parameter) - the 2nd moment vector in the updating formula,
3644          the shape should be the same as `var`.
3645        - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
3646        - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
3647        - **lr** (float) - :math:`l` in the updating formula. The paper suggested value is :math:`10^{-8}`.
3648        - **beta1** (float) - The exponential decay rate for the 1st moment estimations.
3649          The paper suggested value is :math:`0.9`.
3650        - **beta2** (float) - The exponential decay rate for the 2nd moment estimations.
3651          The paper suggested value is :math:`0.999`.
3652        - **epsilon** (float) - Term added to the denominator to improve numerical stability.
3653        - **gradient** (Tensor) - Gradient, has the same shape and data type as `var`.
3654
3655    Outputs:
3656        Tuple of 3 Tensor, the updated parameters.
3657
3658        - **var** (Tensor) - The same shape and data type as Inputs `var`.
3659        - **m** (Tensor) - The same shape and data type as Inputs `m`.
3660        - **v** (Tensor) - The same shape and data type as Inputs `v`.
3661
3662    Raises:
3663        TypeError: If neither `use_locking` nor `use_nesterov` is a bool.
3664        TypeError: If `var`, `m` or `v` is not a Parameter.
3665        TypeError: If `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient` is not a Tensor.
3666
3667    Supported Platforms:
3668        ``Ascend`` ``GPU`` ``CPU``
3669
3670    Examples:
3671        >>> import mindspore
3672        >>> import numpy as np
3673        >>> from mindspore import Tensor, nn, ops
3674        >>> from mindspore import Parameter
3675        >>> class Net(nn.Cell):
3676        ...     def __init__(self):
3677        ...         super(Net, self).__init__()
3678        ...         self.apply_adam = ops.Adam()
3679        ...         self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var")
3680        ...         self.m = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="m")
3681        ...         self.v = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="v")
3682        ...     def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
3683        ...         out = self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
3684        ...                               epsilon, grad)
3685        ...         return out
3686        ...
3687        >>> net = Net()
3688        >>> gradient = Tensor(np.ones([2, 2]).astype(np.float32))
3689        >>> output = net(0.9, 0.999, 0.001, 0.9, 0.999, 1e-8, gradient)
3690        >>> print(net.var.asnumpy())
3691        [[0.9996838 0.9996838]
3692         [0.9996838 0.9996838]]
3693    """
3694    __mindspore_signature__ = (
3695        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
3696        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T1),
3697        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T2),
3698        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T3),
3699        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T4),
3700        sig.make_sig('lr', dtype=sig.sig_dtype.T5),
3701        sig.make_sig('beta1', dtype=sig.sig_dtype.T6),
3702        sig.make_sig('beta2', dtype=sig.sig_dtype.T7),
3703        sig.make_sig('epsilon', dtype=sig.sig_dtype.T8),
3704        sig.make_sig('gradient', dtype=sig.sig_dtype.T)
3705    )
3706
3707    @prim_attr_register
3708    def __init__(self, use_locking=False, use_nesterov=False):
3709        """Initialize Adam."""
3710        validator.check_value_type("use_locking", use_locking, [bool], self.name)
3711        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
3712        self.add_prim_attr('side_effect_mem', True)
3713
3714
3715class AdamNoUpdateParam(Primitive):
3716    r"""
3717    Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This operator do not update the parameter, but
3718    calculate the value that should be added to the parameter instead.
3719
3720    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
3721
3722    The updating formulas are as follows,
3723
3724    .. math::
3725        \begin{array}{ll} \\
3726            m = \beta_1 * m + (1 - \beta_1) * g \\
3727            v = \beta_2 * v + (1 - \beta_2) * g * g \\
3728            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
3729            \Delta{w} = - l * \frac{m}{\sqrt{v} + \epsilon}
3730        \end{array}
3731
3732    :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
3733    `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
3734    :math:`t` represents updating step while :math:`beta_1^t(\beta_1^{t})` and :math:`beta_2^t(\beta_2^{t})`
3735    represent `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`,
3736    :math:`w` represents the parameter to be updated, :math:`\epsilon` represents `epsilon`.
3737
3738    Args:
3739        use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
3740            If ``True`` , updates of the var, m, and v tensors will be protected by a lock.
3741            If ``False`` , the result is unpredictable. Default: ``False`` .
3742        use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
3743            If ``True`` , update the gradients using NAG.
3744            If ``False`` , update the gradients without using NAG. Default: ``False`` .
3745
3746    Inputs:
3747        - **m** (Tensor) - The 1st moment vector in the updating formula. The shape is :math:`(N, *)`
3748          where :math:`*` means, any number of additional dimensions. The data type must be float32.
3749        - **v** (Tensor) - the 2nd moment vector in the updating formula. The shape must be the same as `m`.
3750          The data type must be float32.
3751        - **beta1_power** (Tensor) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
3752          The shape is :math:`(1, )` and the data type must be float32.
3753        - **beta2_power** (Tensor) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
3754          The shape is :math:`(1, )` and the data type must be float32.
3755        - **lr** (Tensor) - :math:`l` in the updating formula.
3756          The shape is :math:`(1, )` and the data type must be float32.
3757        - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations.
3758          The shape is :math:`(1, )` and the data type must be float32.
3759        - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations.
3760          The shape is :math:`(1, )` and the data type must be float32.
3761        - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability.
3762          The shape is :math:`(1, )` and the data type must be float32.
3763        - **gradient** (Tensor) - Gradient, the shape must be the same as `m`, the data type must be float32.
3764
3765    Outputs:
3766        Tensor, whose shape and data type are the same with Inputs `gradient`, is a value that should be added to the
3767        parameter to be updated.
3768
3769    Raises:
3770        TypeError: If neither `use_locking` nor `use_nesterov` is a bool.
3771        TypeError: If `m`,  `v`, `beta1_power`, `beta2_power1`, `lr`, `beta1`, `beta2`, `epsilon` or `gradient`
3772                   is not a Tensor.
3773
3774    Supported Platforms:
3775        ``CPU``
3776
3777    Examples:
3778        >>> class Net(nn.Cell):
3779        ...     def __init__(self):
3780        ...         super(Net, self).__init__()
3781        ...         self.adam = ops.AdamNoUpdateParam()
3782        ...         self.m = Parameter(Tensor(np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2]]).astype(np.float32)),
3783        ...                            name="m")
3784        ...         self.v = Parameter(Tensor(np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2]]).astype(np.float32)),
3785        ...                            name="v")
3786        ...     def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
3787        ...         out = self.adam(self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
3788        ...         return out
3789        >>> net = Net()
3790        >>> beta1_power = Tensor(0.9, ms.float32)
3791        >>> beta2_power = Tensor(0.999, ms.float32)
3792        >>> lr = Tensor(0.001, ms.float32)
3793        >>> beta1 = Tensor(0.9, ms.float32)
3794        >>> beta2 = Tensor(0.999, ms.float32)
3795        >>> epsilon = Tensor(1e-8, ms.float32)
3796        >>> gradient = Tensor(np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1]]).astype(np.float32))
3797        >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient)
3798        >>> print(result)
3799        [[-0.00010004 -0.00010004 -0.00010004]
3800        [-0.00013441 -0.00013441 -0.00013441]]
3801
3802    """
3803
3804    @prim_attr_register
3805    def __init__(self, use_locking=False, use_nesterov=False):
3806        """Initialize AdamNoUpdateParam."""
3807        validator.check_value_type("use_locking", use_locking, [bool], self.name)
3808        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
3809
3810
3811class FusedSparseAdam(Primitive):
3812    r"""
3813    Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam)
3814    algorithm. This operator is used when the gradient is sparse.
3815
3816    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
3817
3818    The updating formulas are as follows,
3819
3820    .. math::
3821        \begin{array}{ll} \\
3822            m = \beta_1 * m + (1 - \beta_1) * g \\
3823            v = \beta_2 * v + (1 - \beta_2) * g * g \\
3824            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
3825            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
3826        \end{array}
3827
3828    :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
3829    `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
3830    :math:`t` represents updating step while :math:`\beta_1^t` and :math:`\beta_2^t` represent `beta1_power` and
3831    `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents
3832    `epsilon`.
3833
3834    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
3835    If they have different data types, the lower priority data type will be converted to
3836    the relatively highest priority data type.
3837
3838    Args:
3839        use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
3840            If ``True`` , updates of the var, m, and v tensors will be protected by a lock.
3841            If ``False`` , the result is unpredictable. Default: ``False`` .
3842        use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
3843            If ``True`` , update the gradients using NAG.
3844            If ``False`` , update the gradients without using NAG. Default: ``False`` .
3845
3846    Inputs:
3847        - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)`
3848          where :math:`*` means, any number of additional dimensions.
3849        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`.
3850        - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`.
3851          Mean square gradients, has the same type as `var` with float32 data type.
3852        - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
3853          The shape is :math:`(1, )`.
3854        - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
3855          The shape is :math:`(1, )`.
3856        - **lr** (Tensor) - :math:`l` in the updating formula. With float32 data type.
3857          The shape is :math:`(1, )`.
3858        - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type.
3859          The shape is :math:`(1, )`.
3860        - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type.
3861          The shape is :math:`(1, )`.
3862        - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type.
3863          The shape is :math:`(1, )`.
3864        - **gradient** (Tensor) - Gradient, has the same data type as `var` and
3865          gradient.shape[1:] = var.shape[1:] if var.shape > 1.
3866        - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
3867
3868    Outputs:
3869        Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
3870
3871        - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
3872        - **m** (Tensor) - A Tensor with shape :math:`(1, )`.
3873        - **v** (Tensor) - A Tensor with shape :math:`(1, )`.
3874
3875    Raises:
3876        TypeError: If neither `use_locking` nor `use_neserov` is a bool.
3877        TypeError: If dtype of `var`, `m`, `v`, `beta1_power`, `beta2_power`, `lr`, `beta1`, `beta2`, `epsilon`,
3878                   `gradient` or `indices` is not float32.
3879        RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported.
3880
3881    Supported Platforms:
3882        ``Ascend`` ``CPU``
3883
3884    Examples:
3885        >>> class Net(nn.Cell):
3886        ...     def __init__(self):
3887        ...         super(Net, self).__init__()
3888        ...         self.sparse_apply_adam = ops.FusedSparseAdam()
3889        ...         self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
3890        ...         self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
3891        ...         self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
3892        ...     def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
3893        ...         out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
3894        ...                                      epsilon, grad, indices)
3895        ...         return out
3896        ...
3897        >>> net = Net()
3898        >>> beta1_power = Tensor(0.9, mindspore.float32)
3899        >>> beta2_power = Tensor(0.999, mindspore.float32)
3900        >>> lr = Tensor(0.001, mindspore.float32)
3901        >>> beta1 = Tensor(0.9, mindspore.float32)
3902        >>> beta2 = Tensor(0.999, mindspore.float32)
3903        >>> epsilon = Tensor(1e-8, mindspore.float32)
3904        >>> gradient = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]), mindspore.float32)
3905        >>> indices = Tensor([0, 1], mindspore.int32)
3906        >>> output = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
3907        >>> print(net.var.asnumpy())
3908        [[[0.9997121  0.9997121 ]]
3909         [[0.9997121  0.9997121 ]]
3910         [[0.99971527 0.99971527]]]
3911    """
3912    __mindspore_signature__ = (
3913        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
3914        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
3915        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
3916        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T),
3917        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T),
3918        sig.make_sig('lr', dtype=sig.sig_dtype.T),
3919        sig.make_sig('beta1', dtype=sig.sig_dtype.T),
3920        sig.make_sig('beta2', dtype=sig.sig_dtype.T),
3921        sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
3922        sig.make_sig('grad', dtype=sig.sig_dtype.T),
3923        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
3924    )
3925
3926    @prim_attr_register
3927    def __init__(self, use_locking=False, use_nesterov=False):
3928        """Initialize FusedSparseAdam."""
3929        validator.check_value_type("use_locking", use_locking, [bool], self.name)
3930        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
3931        self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
3932                                        'epsilon', 'grad', 'indices'],
3933                                outputs=['var', 'm', 'v'])
3934        self.add_prim_attr('side_effect_mem', True)
3935
3936
3937class FusedSparseLazyAdam(Primitive):
3938    r"""
3939    Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam)
3940    algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
3941    original Adam algorithm, as only the current indices parameters will be updated.
3942
3943    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
3944
3945    The updating formulas are as follows,
3946
3947    .. math::
3948        \begin{array}{ll} \\
3949            m = \beta_1 * m + (1 - \beta_1) * g \\
3950            v = \beta_2 * v + (1 - \beta_2) * g * g \\
3951            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
3952            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
3953        \end{array}
3954
3955    :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents
3956    `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
3957    :math:`t` represents updating step while :math:`\beta_1^t` and :math:`\beta_2^t` represent `beta1_power` and
3958    `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents
3959    `epsilon`.
3960
3961    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
3962    If they have different data types, the lower priority data type will be converted to
3963    the relatively highest priority data type.
3964
3965    Args:
3966        use_locking (bool): Whether to enable a lock to protect variable tensors from being updated.
3967            If ``True`` , updates of the var, m, and v tensors will be protected by a lock.
3968            If ``False`` , the result is unpredictable. Default: ``False`` .
3969        use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
3970            If ``True`` , update the gradients using NAG.
3971            If ``False`` , update the gradients without using NAG. Default: ``False`` .
3972
3973    Inputs:
3974        - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)`
3975          where :math:`*` means, any number of additional dimensions.
3976        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`.
3977        - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`.
3978          Mean square gradients, has the same type as `var` with float32 data type.
3979        - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
3980          The shape is :math:`(1, )`.
3981        - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
3982          The shape is :math:`(1, )`.
3983        - **lr** (Tensor) - :math:`l` in the updating formula with float32 data type.
3984          The shape is :math:`(1, )`.
3985        - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimations with float32 data type.
3986          The shape is :math:`(1, )`.
3987        - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimations with float32 data type.
3988          The shape is :math:`(1, )`.
3989        - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability with float32 data type.
3990          The shape is :math:`(1, )`.
3991        - **gradient** (Tensor) - Gradient value with float32 data type and
3992          gradient.shape[1:] = var.shape[1:] if var.shape > 1.
3993        - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
3994
3995    Outputs:
3996        Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
3997
3998        - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
3999        - **m** (Tensor) - A Tensor with shape :math:`(1, )`.
4000        - **v** (Tensor) - A Tensor with shape :math:`(1, )`.
4001
4002    Raises:
4003        TypeError: If neither `use_locking` nor `use_nestrov` is a bool.
4004        TypeError: If dtype of `var`, `m`, `v`, `beta1_power`, `beta2_power`, `lr`, `beta1`, `beta2`, `epsilon` or
4005                   gradient is not float32.
4006        TypeError: If dtype of `indices` is not int32.
4007        RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported.
4008
4009    Supported Platforms:
4010        ``Ascend`` ``CPU``
4011
4012    Examples:
4013        >>> class Net(nn.Cell):
4014        ...     def __init__(self):
4015        ...         super(Net, self).__init__()
4016        ...         self.sparse_apply_lazyadam = ops.FusedSparseLazyAdam()
4017        ...         self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
4018        ...         self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
4019        ...         self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
4020        ...     def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
4021        ...         out = self.sparse_apply_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1,
4022        ...                                          beta2, epsilon, grad, indices)
4023        ...         return out
4024        ...
4025        >>> net = Net()
4026        >>> beta1_power = Tensor(0.9, mindspore.float32)
4027        >>> beta2_power = Tensor(0.999, mindspore.float32)
4028        >>> lr = Tensor(0.001, mindspore.float32)
4029        >>> beta1 = Tensor(0.9, mindspore.float32)
4030        >>> beta2 = Tensor(0.999, mindspore.float32)
4031        >>> epsilon = Tensor(1e-8, mindspore.float32)
4032        >>> gradient = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]), mindspore.float32)
4033        >>> indices = Tensor([0, 1], mindspore.int32)
4034        >>> output = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
4035        >>> print(net.var.asnumpy())
4036        [[[0.9997121  0.9997121 ]]
4037         [[0.9997121  0.9997121 ]]
4038         [[1.         1.        ]]]
4039    """
4040    __mindspore_signature__ = (
4041        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4042        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4043        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4044        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T),
4045        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T),
4046        sig.make_sig('lr', dtype=sig.sig_dtype.T),
4047        sig.make_sig('beta1', dtype=sig.sig_dtype.T),
4048        sig.make_sig('beta2', dtype=sig.sig_dtype.T),
4049        sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
4050        sig.make_sig('grad', dtype=sig.sig_dtype.T),
4051        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
4052    )
4053
4054    @prim_attr_register
4055    def __init__(self, use_locking=False, use_nesterov=False):
4056        """Initialize FusedSparseLazyAdam."""
4057        validator.check_value_type("use_locking", use_locking, [bool], self.name)
4058        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
4059        self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
4060                                        'epsilon', 'grad', 'indices'],
4061                                outputs=['var', 'm', 'v'])
4062        self.add_prim_attr('side_effect_mem', True)
4063
4064
4065class FusedSparseFtrl(Primitive):
4066    """
4067    Merges the duplicate value of the gradient and then updates relevant entries according to the FTRL-proximal scheme.
4068
4069    All inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
4070    If they have different data types, the lower priority data type will be converted to
4071    the relatively highest priority data type.
4072
4073    Args:
4074        lr (float): The learning rate value, must be positive.
4075        l1 (float): l1 regularization strength, must be greater than or equal to zero.
4076        l2 (float): l2 regularization strength, must be greater than or equal to zero.
4077        lr_power (float): Learning rate power controls how the learning rate decreases during training,
4078            must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
4079        use_locking (bool): Use locks for updating operation if True . Default: ``False`` .
4080
4081    Inputs:
4082        - **var** (Parameter) - The variable to be updated. The data type must be float32. The shape is :math:`(N, *)`
4083          where :math:`*` means, any number of additional dimensions.
4084        - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
4085        - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`.
4086        - **grad** (Tensor) - A tensor of the same type as `var` and
4087          grad.shape[1:] = var.shape[1:] if var.shape > 1.
4088        - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
4089          The type must be int32 and indices.shape[0] = grad.shape[0].
4090
4091    Outputs:
4092        Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
4093
4094        - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
4095        - **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
4096        - **linear** (Tensor) - A Tensor with shape :math:`(1, )`.
4097
4098    Raises:
4099        TypeError: If `lr`, `l1`, `l2` or `lr_power` is not a float.
4100        ValueError: If shape of `lr_power` less than or equal to zero.
4101        TypeError: If dtype of `var` is not float32.
4102        TypeError: If dtype of `indices` is not int32.
4103        TypeError: If shape of `accum`, `linear` or `grad` is not same as `var`.
4104        TypeError: If shape of `indices` is not same as shape of first dimension of `grad`.
4105        RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported.
4106
4107    Supported Platforms:
4108        ``Ascend`` ``CPU``
4109
4110    Examples:
4111        >>> class SparseApplyFtrlNet(nn.Cell):
4112        ...     def __init__(self):
4113        ...         super(SparseApplyFtrlNet, self).__init__()
4114        ...         self.sparse_apply_ftrl = ops.FusedSparseFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
4115        ...         self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
4116        ...         self.accum = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="accum")
4117        ...         self.linear = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="linear")
4118        ...
4119        ...     def construct(self, grad, indices):
4120        ...         out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
4121        ...         return out
4122        ...
4123        >>> net = SparseApplyFtrlNet()
4124        >>> grad = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]).astype(np.float32))
4125        >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
4126        >>> output = net(grad, indices)
4127        >>> print(net.var.asnumpy())
4128        [[[-0.00598256 -0.00598256]]
4129         [[-0.00598256 -0.00598256]]
4130         [[ 1.          1.        ]]]
4131    """
4132    __mindspore_signature__ = (
4133        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4134        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4135        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4136        sig.make_sig('grad', dtype=sig.sig_dtype.T),
4137        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
4138    )
4139
4140    @prim_attr_register
4141    def __init__(self, lr, l1, l2, lr_power, use_locking=False):
4142        """Initialize FusedSparseFtrl."""
4143        self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
4144                                outputs=['output'])
4145        self.add_prim_attr('side_effect_mem', True)
4146
4147        validator.check_value_type("lr", lr, [float], self.name)
4148        validator.check_value_type("l1", l1, [float], self.name)
4149        validator.check_value_type("l2", l2, [float], self.name)
4150        validator.check_value_type("lr_power", lr_power, [float], self.name)
4151        self.lr = validator.check_positive_float(lr, "lr", self.name)
4152        self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
4153        self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
4154        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
4155        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
4156
4157
4158class FusedSparseProximalAdagrad(Primitive):
4159    r"""
4160    Merges the duplicate value of the gradient and then updates relevant entries according to the proximal adagrad
4161    algorithm.
4162
4163    .. math::
4164        \begin{array}{ll} \\
4165            accum += grad * grad \\
4166            \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\
4167            var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
4168        \end{array}
4169
4170    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
4171    If they have different data types, the lower priority data type will be converted to
4172    the relatively highest priority data type.
4173
4174    Args:
4175        use_locking (bool): If ``True`` , the variable and accumulation tensors will be protected from being updated.
4176            Default: ``False`` .
4177
4178    Inputs:
4179        - **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
4180          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4181        - **accum** (Parameter) - Variable tensor to be updated, has the same shape and data type as `var`.
4182        - **lr** (Tensor) - The learning rate value. The data type must be float32. The shape is :math:`(1, )`.
4183        - **l1** (Tensor) - l1 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
4184        - **l2** (Tensor) - l2 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
4185        - **grad** (Tensor) - A tensor of the same data type as `var` and
4186          grad.shape[1:] = var.shape[1:] if var.shape > 1.
4187        - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
4188          The type must be int32 and indices.shape[0] = grad.shape[0].
4189
4190    Outputs:
4191        Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless.
4192
4193        - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
4194        - **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
4195
4196    Raises:
4197        TypeError: If `use_locking` is not a bool.
4198        TypeError: If dtype of `var`, `accum`, `lr`, `l1`, `l2` or `grad` is not float32.
4199        TypeError: If dtype of `indices` is not int32.
4200        RuntimeError: If the data type of all inputs except `indices` conversion of Parameter is not supported.
4201
4202    Supported Platforms:
4203        ``Ascend`` ``CPU``
4204
4205    Examples:
4206        >>> class Net(nn.Cell):
4207        ...     def __init__(self):
4208        ...         super(Net, self).__init__()
4209        ...         self.sparse_apply_proximal_adagrad = ops.FusedSparseProximalAdagrad()
4210        ...         self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
4211        ...         self.accum = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="accum")
4212        ...         self.lr = Tensor(0.01, mindspore.float32)
4213        ...         self.l1 = Tensor(0.0, mindspore.float32)
4214        ...         self.l2 = Tensor(0.0, mindspore.float32)
4215        ...     def construct(self, grad, indices):
4216        ...         out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
4217        ...                                                  self.l2, grad, indices)
4218        ...         return out
4219        ...
4220        >>> net = Net()
4221        >>> grad = Tensor(np.array([[[0.1, 0.1]], [[0.1, 0.1]]]).astype(np.float32))
4222        >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
4223        >>> output = net(grad, indices)
4224        >>> print(net.var.asnumpy())
4225        [[[0.99900496 0.99900496]]
4226         [[0.99900496 0.99900496]]
4227         [[1.         1.        ]]]
4228    """
4229    __mindspore_signature__ = (
4230        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4231        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4232        sig.make_sig('lr', dtype=sig.sig_dtype.T),
4233        sig.make_sig('l1', dtype=sig.sig_dtype.T),
4234        sig.make_sig('l2', dtype=sig.sig_dtype.T),
4235        sig.make_sig('grad', dtype=sig.sig_dtype.T),
4236        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
4237    )
4238
4239    @prim_attr_register
4240    def __init__(self, use_locking=False):
4241        """Initialize FusedSparseProximalAdagrad"""
4242        self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
4243                                outputs=['output'])
4244        self.add_prim_attr('side_effect_mem', True)
4245        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
4246
4247
4248class KLDivLoss(Primitive):
4249    r"""
4250    Computes the Kullback-Leibler divergence between the logits and the labels.
4251
4252    For tensors of the same shape :math:`x` and :math:`target`,
4253    the updating formulas of KLDivLoss algorithm are as follows,
4254
4255    .. math::
4256        L(x, target) = target \cdot (\log target - x)
4257
4258    Then,
4259
4260    .. math::
4261        \ell(x, target) = \begin{cases}
4262        L(x, target), & \text{if reduction} = \text{'none';}\\
4263        \operatorname{mean}(L(x, target)), & \text{if reduction} = \text{'mean';}\\
4264        \operatorname{sum}(L(x, target)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\
4265        \operatorname{sum}(L(x, target)),  & \text{if reduction} = \text{'sum'.}
4266        \end{cases}
4267
4268    where :math:`x` represents `logits`,
4269    :math:`target` represents `labels`, and
4270    :math:`\ell(x, target)` represents `output`.
4271
4272    Note:
4273        - On Ascend, float64 dtype is not currently supported.
4274        - The output aligns with the mathematical definition of Kullback-Leibler divergence
4275          only when `reduction` is set to ``'batchmean'``.
4276        - On Ascend, the value of `reduction` must be one of ``'batchmean'``, ``'none'`` or ``'sum'``.
4277        - On GPU, the value of `reduction` must be one of ``'mean'``, ``'none'`` or ``'sum'``.
4278        - On CPU, the value of `reduction` must be one of ``'mean'``, ``'batchmean'``, ``'none'``
4279          or ``'sum'``.
4280
4281    Args:
4282        reduction (str): Specifies the reduction to be applied to the output.
4283            Default: ``'mean'`` .
4284
4285            - ``'none'``: no reduction will be applied.
4286            - ``'mean'``: compute and return the mean of elements in the output.
4287            - ``'sum'``: the output elements will be summed.
4288            - ``'batchmean'``: average loss is taken over the batch, similar to the mean mode.
4289
4290    Inputs:
4291        - **logits** (Tensor) - The input Tensor. The data type must be float16, float32 or float64.
4292        - **labels** (Tensor) - The label Tensor which has the same shape and data type as `logits`.
4293
4294    Outputs:
4295        Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
4296        Otherwise it is a scalar.
4297
4298    Raises:
4299        TypeError: If `reduction` is not a str.
4300        TypeError: If neither `logits` nor `labels` is a Tensor.
4301        TypeError: If dtype of `logits` or `labels` is not currently supported.
4302        ValueError: If shape of `logits` is not the same as `labels`.
4303        RuntimeError: If `logits` or `labels` is a scalar when `reduction` is 'batchmean'.
4304
4305    Supported Platforms:
4306        ``Ascend`` ``GPU`` ``CPU``
4307
4308    Examples:
4309        >>> import mindspore
4310        >>> import numpy as np
4311        >>> from mindspore import Tensor, nn, ops
4312        >>> class Net(nn.Cell):
4313        ...     def __init__(self):
4314        ...         super(Net, self).__init__()
4315        ...         self.kldiv_loss = ops.KLDivLoss(reduction='sum')
4316        ...     def construct(self, logits, labels):
4317        ...         result = self.kldiv_loss(logits, labels)
4318        ...         return result
4319        ...
4320        >>> net = Net()
4321        >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
4322        >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32)
4323        >>> output = net(logits, labels)
4324        >>> print(output)
4325        -0.7
4326    """
4327
4328    @prim_attr_register
4329    def __init__(self, reduction='mean'):
4330        """Initialize KLDivLoss."""
4331        device_target = context.get_context("device_target")
4332        if device_target == "CPU":
4333            support_mode = ['none', 'mean', 'batchmean', 'sum']
4334        elif device_target == "GPU":
4335            support_mode = ['none', 'mean', 'sum']
4336        elif device_target == "Ascend":
4337            support_mode = ['none', 'batchmean', 'sum', 'mean']
4338        else:
4339            raise ValueError(f"'{self.name}' unknown device target: '{device_target}'")
4340
4341        self.reduction = validator.check_string(reduction, support_mode, 'reduction', self.name)
4342
4343
4344class ApplyAdaMax(Primitive):
4345    r"""
4346    Updates relevant entries according to the adamax scheme.
4347
4348    The updating formulas are as follows,
4349
4350    .. math::
4351        \begin{array}{ll} \\
4352            m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
4353            v_{t+1} = \max(\beta_2 * v_{t}, \left| g \right|) \\
4354            var = var - \frac{l}{1 - \beta_1^{t+1}} * \frac{m_{t+1}}{v_{t+1} + \epsilon}
4355        \end{array}
4356
4357    :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}`
4358    is the last moment of :math:`m_{t+1}`, :math:`v` represents the 2nd moment vector, :math:`v_{t}`
4359    is the last moment of :math:`v_{t+1}`, :math:`l` represents scaling factor `lr`,
4360    :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
4361    :math:`\beta_1^{t+1}` represents `beta1_power`, :math:`var` represents the variable to be updated,
4362    :math:`\epsilon` represents `epsilon`.
4363
4364    Inputs of `var`, `m`, `v` and `grad` comply with the implicit type conversion rules
4365    to make the data types consistent.
4366    If they have different data types, the lower priority data type will be converted to
4367    the relatively highest priority data type.
4368
4369    Inputs:
4370        - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
4371          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4372        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape as `var`.
4373          With float32 or float16 data type.
4374        - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
4375          with the same shape as `var`. With float32 or float16 data type.
4376        - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be a scalar.
4377          With float32 or float16 data type.
4378        - **lr** (Union[Number, Tensor]) - Learning rate, :math:`l` in the updating formula, must be a scalar.
4379          With float32 or float16 data type.
4380        - **beta1** (Union[Number, Tensor]) - The exponential decay rate for the 1st moment estimations,
4381          must be a scalar. With float32 or float16 data type.
4382        - **beta2** (Union[Number, Tensor]) - The exponential decay rate for the 2nd moment estimations,
4383          must be a scalar. With float32 or float16 data type.
4384        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar.
4385          With float32 or float16 data type.
4386        - **grad** (Tensor) - A tensor for gradient, has the same shape as `var`.
4387          With float32 or float16 data type.
4388
4389    Outputs:
4390        Tuple of 3 Tensor, the updated parameters.
4391
4392        - **var** (Tensor) - The same shape and data type as `var`.
4393        - **m** (Tensor) - The same shape and data type as `m`.
4394        - **v** (Tensor) - The same shape and data type as `v`.
4395
4396    Raises:
4397        TypeError: If dtype of `var`, `m`, `v`, `beta_power`, `lr`, `beta1`, `beta2`, `epsilon` or `grad` is neither
4398                   float16 nor float32.
4399        TypeError: If `beta_power`, `lr`, `beta1`, `beta2` or `epsilon` is neither a Number nor a Tensor.
4400        TypeError: If `grad` is not a Tensor.
4401        TypeError: If the data type of `var`, `m`, `v` and `grad` conversion of Parameter is not supported.
4402
4403    Supported Platforms:
4404        ``Ascend`` ``GPU`` ``CPU``
4405
4406    Examples:
4407        >>> import mindspore
4408        >>> import numpy as np
4409        >>> from mindspore import Tensor, nn, ops, Parameter
4410        >>> class Net(nn.Cell):
4411        ...     def __init__(self):
4412        ...         super(Net, self).__init__()
4413        ...         self.apply_ada_max = ops.ApplyAdaMax()
4414        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
4415        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
4416        ...         self.m = Parameter(Tensor(np.array([[0.6, 0.5],
4417        ...                                             [0.2, 0.6]]).astype(np.float32)), name="m")
4418        ...         self.v = Parameter(Tensor(np.array([[0.9, 0.1],
4419        ...                                             [0.7, 0.8]]).astype(np.float32)), name="v")
4420        ...     def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad):
4421        ...         out = self.apply_ada_max(self.var, self.m, self.v, beta1_power, lr, beta1, beta2, epsilon, grad)
4422        ...         return out
4423        ...
4424        >>> net = Net()
4425        >>> beta1_power =Tensor(0.9, mindspore.float32)
4426        >>> lr = Tensor(0.001, mindspore.float32)
4427        >>> beta1 = Tensor(0.9, mindspore.float32)
4428        >>> beta2 = Tensor(0.99, mindspore.float32)
4429        >>> epsilon = Tensor(1e-10, mindspore.float32)
4430        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
4431        >>> output = net(beta1_power, lr, beta1, beta2, epsilon, grad)
4432        >>> print(output)
4433        (Tensor(shape=[2, 2], dtype=Float32, value=
4434        [[ 5.93602717e-01,  3.92571449e-01],
4435         [ 9.72582996e-02,  4.92249995e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4436        [[ 5.69999993e-01,  5.19999981e-01],
4437         [ 1.89999998e-01,  6.20000005e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4438        [[ 8.90999973e-01,  6.99999988e-01],
4439         [ 6.93000019e-01,  8.00000012e-01]]))
4440    """
4441
4442    __mindspore_signature__ = (
4443        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4444        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4445        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4446        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T1),
4447        sig.make_sig('lr', dtype=sig.sig_dtype.T2),
4448        sig.make_sig('beta1', dtype=sig.sig_dtype.T3),
4449        sig.make_sig('beta2', dtype=sig.sig_dtype.T4),
4450        sig.make_sig('epsilon', dtype=sig.sig_dtype.T5),
4451        sig.make_sig('grad', dtype=sig.sig_dtype.T)
4452    )
4453
4454    @prim_attr_register
4455    def __init__(self):
4456        """Initialize ApplyAdaMax"""
4457        self.add_prim_attr('side_effect_mem', True)
4458
4459
4460class ApplyAdadelta(Primitive):
4461    r"""
4462    Updates relevant entries according to the adadelta scheme.
4463
4464    The Adadelta algorithm is proposed in
4465    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.
4466
4467    .. math::
4468        \begin{array}{ll} \\
4469            \text{accum} = \rho * \text{accum} + (1 - \rho) * \text{grad}^2 \\
4470            \text{update} = \sqrt{\text{accum_update} +
4471              \epsilon} * \frac{\text{grad}}{\sqrt{\text{accum} + \epsilon}} \\
4472            \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * \text{update}^2 \\
4473            \text{var} = \text{var} - \text{lr} * \text{update}
4474        \end{array}
4475
4476    where :math:`\rho` represents `rho`, :math:`\epsilon` represents `epsilon`.
4477
4478    Inputs of `var`, `accum`, `accum_update` and `grad` comply with the implicit type conversion rules
4479    to make the data types consistent.
4480    If they have different data types, the lower priority data type will be converted to
4481    the relatively highest priority data type.
4482
4483    Inputs:
4484        - **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
4485          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4486        - **accum** (Parameter) - Accumulation to be updated, has the same shape and data type as `var`.
4487        - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and data type as `var`.
4488        - **lr** (Union[Number, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
4489        - **rho** (Union[Number, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
4490        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar.
4491          With float32 or float16 data type.
4492        - **grad** (Tensor) - Gradients, has the same shape and data type as `var`.
4493
4494    Outputs:
4495        Tuple of 3 Tensor, the updated parameters.
4496
4497        - **var** (Tensor) - The same shape and data type as `var`.
4498        - **accum** (Tensor) - The same shape and data type as `accum`.
4499        - **accum_update** (Tensor) - The same shape and data type as `accum_update`.
4500
4501    Raises:
4502        TypeError: If dtype of `var`, `accum`, `accum_update`, `lr`, `rho`, `epsilon` or `grad` is neither float16 nor
4503                   float32.
4504        TypeError: If `accum_update`, `lr`, `rho` or `epsilon` is neither a Number nor a Tensor.
4505        TypeError: If the data type of `var`, `accum`, `accum_update` and `grad` conversion of Parameter
4506                      is not supported.
4507
4508    Supported Platforms:
4509        ``Ascend`` ``GPU`` ``CPU``
4510
4511    Examples:
4512        >>> import numpy as np
4513        >>> import mindspore
4514        >>> from mindspore import nn, Tensor, ops, Parameter
4515        >>> class Net(nn.Cell):
4516        ...     def __init__(self):
4517        ...         super(Net, self).__init__()
4518        ...         self.apply_adadelta = ops.ApplyAdadelta()
4519        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
4520        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
4521        ...         self.accum = Parameter(Tensor(np.array([[0.6, 0.5],
4522        ...                                                 [0.2, 0.6]]).astype(np.float32)), name="accum")
4523        ...         self.accum_update = Parameter(Tensor(np.array([[0.9, 0.1],
4524        ...                                                        [0.7, 0.8]]).astype(np.float32)),
4525        ...                                                             name="accum_update")
4526        ...     def construct(self, lr, rho, epsilon, grad):
4527        ...         out = self.apply_adadelta(self.var, self.accum, self.accum_update, lr, rho, epsilon, grad)
4528        ...         return out
4529        ...
4530        >>> net = Net()
4531        >>> lr = Tensor(0.001, mindspore.float32)
4532        >>> rho = Tensor(0.0, mindspore.float32)
4533        >>> epsilon = Tensor(1e-6, mindspore.float32)
4534        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
4535        >>> output = net(lr, rho, epsilon, grad)
4536        >>> print(output)
4537        (Tensor(shape=[2, 2], dtype=Float32, value=
4538        [[ 5.99051356e-01,  3.99683774e-01],
4539         [ 9.91633832e-02,  4.99105573e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4540        [[ 9.00000036e-02,  4.89999980e-01],
4541         [ 1.00000007e-02,  6.40000045e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4542        [[ 8.99990857e-01,  1.00000791e-01],
4543         [ 6.99930906e-01,  7.99999774e-01]]))
4544    """
4545
4546    __mindspore_signature__ = (
4547        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4548        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4549        sig.make_sig('accum_update', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4550        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
4551        sig.make_sig('rho', dtype=sig.sig_dtype.T2),
4552        sig.make_sig('epsilon', dtype=sig.sig_dtype.T3),
4553        sig.make_sig('grad', dtype=sig.sig_dtype.T)
4554    )
4555
4556    @prim_attr_register
4557    def __init__(self):
4558        """Initialize ApplyAdadelta"""
4559        self.add_prim_attr('side_effect_mem', True)
4560
4561
4562class ApplyAdagrad(Primitive):
4563    r"""
4564    Updates relevant entries according to the adagrad scheme.
4565    The Adagrad algorithm was proposed in
4566    `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
4567    <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_.
4568    This module can adaptively assign different learning rates for each parameter in view of the uneven number
4569    of samples for different parameters.
4570
4571    .. math::
4572        \begin{array}{ll} \\
4573            accum += grad * grad \\
4574            var -= lr * grad * \frac{1}{\sqrt{accum}}
4575        \end{array}
4576
4577    Inputs of `var`, `accum` and `grad`  comply with the implicit type conversion rules
4578    to make the data types consistent.
4579    If they have different data types, the lower priority data type will be converted to
4580    the relatively highest priority data type.
4581
4582    Args:
4583        update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
4584
4585    Inputs:
4586        - **var** (Parameter) - Variable to be updated. With float or complex data type.
4587          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4588        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
4589        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
4590        - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
4591
4592    Outputs:
4593        Tuple of 2 Tensors, the updated parameters.
4594
4595        - **var** (Tensor) - The same shape and data type as `var`.
4596        - **accum** (Tensor) - The same shape and data type as `accum`.
4597
4598    Raises:
4599        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float nor complex.
4600        TypeError: If `lr` is neither a Number nor a Tensor.
4601        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
4602
4603    Supported Platforms:
4604        ``Ascend`` ``GPU`` ``CPU``
4605
4606    Examples:
4607        >>> import mindspore
4608        >>> import numpy as np
4609        >>> from mindspore import Tensor, nn, ops, Parameter
4610        >>> class Net(nn.Cell):
4611        ...     def __init__(self):
4612        ...         super(Net, self).__init__()
4613        ...         self.apply_adagrad = ops.ApplyAdagrad()
4614        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
4615        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
4616        ...         self.accum = Parameter(Tensor(np.array([[0.6, 0.5],
4617        ...                                                 [0.2, 0.6]]).astype(np.float32)), name="accum")
4618        ...     def construct(self, lr, grad):
4619        ...         out = self.apply_adagrad(self.var, self.accum, lr, grad)
4620        ...         return out
4621        ...
4622        >>> net = Net()
4623        >>> lr = Tensor(0.001, mindspore.float32)
4624        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
4625        >>> output = net(lr, grad)
4626        >>> print(output)
4627        (Tensor(shape=[2, 2], dtype=Float32, value=
4628        [[ 5.99638879e-01,  3.99296492e-01],
4629         [ 9.97817814e-02,  4.99281585e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4630        [[ 6.90000057e-01,  9.90000010e-01],
4631         [ 2.10000008e-01,  1.24000001e+00]]))
4632    """
4633
4634    __mindspore_signature__ = (
4635        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4636        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4637        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
4638        sig.make_sig('grad', dtype=sig.sig_dtype.T)
4639    )
4640
4641    @prim_attr_register
4642    def __init__(self, update_slots=True):
4643        """Initialize ApplyAdagrad."""
4644        validator.check_value_type("update_slots", update_slots, [bool], self.name)
4645        self.add_prim_attr('side_effect_mem', True)
4646
4647
4648class ApplyAdagradV2(Primitive):
4649    r"""
4650    Updates relevant entries according to the adagradv2 scheme.
4651
4652    .. math::
4653        \begin{array}{ll} \\
4654            accum += grad * grad \\
4655            var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon}
4656        \end{array}
4657
4658    where :math:`\epsilon` represents `epsilon`.
4659
4660    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
4661    to make the data types consistent.
4662    If they have different data types, the lower priority data type will be converted to
4663    the relatively highest priority data type.
4664
4665    Note:
4666        The difference is that `ApplyAdagradV2` has one more small constant value :math:`\epsilon` than `ApplyAdagrad`.
4667
4668    Args:
4669        epsilon (float): A small value added for numerical stability.
4670        update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
4671
4672    Inputs:
4673        - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
4674          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4675        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
4676        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
4677          a scalar tensor with float16 or float32 data type.
4678        - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
4679
4680    Outputs:
4681        Tuple of 2 Tensors, the updated parameters.
4682
4683        - **var** (Tensor) - The same shape and data type as `var`.
4684        - **accum** (Tensor) - The same shape and data type as `accum`.
4685
4686    Raises:
4687        TypeError: If dtype of `var`, `accum`, `lr` or `grad` is neither float16 nor float32.
4688        TypeError: If `lr` is neither a Number nor a Tensor.
4689        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
4690
4691    Supported Platforms:
4692        ``Ascend`` ``GPU`` ``CPU``
4693
4694    Examples:
4695        >>> import mindspore
4696        >>> import numpy as np
4697        >>> from mindspore import Tensor, nn, ops, Parameter
4698        >>> class Net(nn.Cell):
4699        ...     def __init__(self):
4700        ...         super(Net, self).__init__()
4701        ...         self.apply_adagrad_v2 = ops.ApplyAdagradV2(epsilon=1e-6)
4702        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
4703        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
4704        ...         self.accum = Parameter(Tensor(np.array([[0.6, 0.5],
4705        ...                                                 [0.2, 0.6]]).astype(np.float32)), name="accum")
4706        ...     def construct(self, lr, grad):
4707        ...         out = self.apply_adagrad_v2(self.var, self.accum, lr, grad)
4708        ...         return out
4709        ...
4710        >>> net = Net()
4711        >>> lr = Tensor(0.001, mindspore.float32)
4712        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
4713        >>> output = net(lr, grad)
4714        >>> print(output)
4715        (Tensor(shape=[2, 2], dtype=Float32, value=
4716        [[ 5.99638879e-01,  3.99296492e-01],
4717         [ 9.97817814e-02,  4.99281585e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4718        [[ 6.90000057e-01,  9.90000010e-01],
4719         [ 2.10000008e-01,  1.24000001e+00]]))
4720    """
4721
4722    __mindspore_signature__ = (
4723        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4724        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4725        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
4726        sig.make_sig('grad', dtype=sig.sig_dtype.T)
4727    )
4728
4729    @prim_attr_register
4730    def __init__(self, epsilon, update_slots=True):
4731        """Initialize ApplyAdagradV2."""
4732        validator.check_value_type("epsilon", epsilon, [float], self.name)
4733        validator.check_value_type("update_slots", update_slots, [bool], self.name)
4734        self.add_prim_attr('side_effect_mem', True)
4735
4736
4737class SparseApplyAdagrad(Primitive):
4738    """
4739    Deprecated
4740    """
4741
4742    __mindspore_signature__ = (
4743        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4744        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4745        sig.make_sig('grad', dtype=sig.sig_dtype.T),
4746        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
4747    )
4748
4749    @deprecated("1.9", "SparseApplyAdagrad", False)
4750    @prim_attr_register
4751    def __init__(self, lr, update_slots=True, use_locking=False):
4752        """Initialize SparseApplyAdagrad."""
4753        validator.check_is_float(lr, "lr", self.name)
4754        validator.check_value_type("update_slots", update_slots, [bool], self.name)
4755        validator.check_value_type("use_locking", use_locking, [bool], self.name)
4756        self.add_prim_attr('side_effect_mem', True)
4757
4758
4759class SparseApplyAdagradV2(Primitive):
4760    r"""
4761    Updates relevant entries according to the adagrad scheme, one more epsilon attribute than SparseApplyAdagrad.
4762
4763    .. math::
4764        \begin{array}{ll} \\
4765            accum += grad * grad \\
4766            var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon}
4767        \end{array}
4768
4769    where :math:`\epsilon` represents `epsilon`.
4770
4771    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
4772    to make the data types consistent.
4773    If they have different data types, the lower priority data type will be converted to
4774    the relatively highest priority data type.
4775
4776    Args:
4777        lr (float): Learning rate.
4778        epsilon (float): A small value added for numerical stability.
4779        use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
4780            Default: ``False`` .
4781        update_slots (bool): If ``True`` , the computation logic will be different to `False`. Default: ``True`` .
4782
4783    Inputs:
4784        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
4785          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4786        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
4787        - **grad** (Tensor) - Gradients has the same shape as `var` and
4788          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
4789        - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
4790          The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
4791
4792    Outputs:
4793        Tuple of 2 tensors, the updated parameters.
4794
4795        - **var** (Tensor) - The same shape and data type as `var`.
4796        - **accum** (Tensor) - The same shape and data type as `accum`.
4797
4798    Raises:
4799        TypeError: If neither `lr` nor `epsilon` is a float.
4800        TypeError: If neither `update_slots` nor `use_locking` is a bool.
4801        TypeError: If dtype of `var`, `accum` or `grad` is neither float16 nor float32.
4802        TypeError: If dtype of `indices` is not int32.
4803        RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
4804
4805    Supported Platforms:
4806        ``Ascend`` ``GPU`` ``CPU``
4807
4808    Examples:
4809        >>> import mindspore
4810        >>> import numpy as np
4811        >>> from mindspore import Tensor, nn, ops, Parameter
4812        >>> class Net(nn.Cell):
4813        ...     def __init__(self):
4814        ...         super(Net, self).__init__()
4815        ...         self.sparse_apply_adagrad_v2 = ops.SparseApplyAdagradV2(lr=1e-8, epsilon=1e-6)
4816        ...         self.var = Parameter(Tensor(np.array([[0.2]]).astype(np.float32)), name="var")
4817        ...         self.accum = Parameter(Tensor(np.array([[0.1]]).astype(np.float32)), name="accum")
4818        ...
4819        ...     def construct(self, grad, indices):
4820        ...         out = self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices)
4821        ...         return out
4822        ...
4823        >>> net = Net()
4824        >>> grad = Tensor(np.array([[0.7]]).astype(np.float32))
4825        >>> indices = Tensor(np.array([0]), mindspore.int32)
4826        >>> output = net(grad, indices)
4827        >>> print(output)
4828        (Tensor(shape=[1, 1], dtype=Float32, value=
4829        [[ 1.99999988e-01]]), Tensor(shape=[1, 1], dtype=Float32, value=
4830        [[ 5.89999974e-01]]))
4831    """
4832
4833    __mindspore_signature__ = (
4834        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4835        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4836        sig.make_sig('grad', dtype=sig.sig_dtype.T),
4837        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
4838    )
4839
4840    @prim_attr_register
4841    def __init__(self, lr, epsilon, use_locking=False, update_slots=True):
4842        """Initialize SparseApplyAdagradV2."""
4843        self.lr = validator.check_value_type("lr", lr, [float], self.name)
4844        self.epsilon = validator.check_value_type("epsilon", epsilon, [float], self.name)
4845        self.use_locking = validator.check_value_type("update_slots", update_slots, [bool], self.name)
4846        self.update_slots = validator.check_value_type("use_locking", use_locking, [bool], self.name)
4847        self.add_prim_attr('side_effect_mem', True)
4848
4849
4850class ApplyProximalAdagrad(Primitive):
4851    r"""
4852    Updates relevant entries according to the proximal adagrad algorithm.
4853    The proximal adagrad algorithm was proposed in `Efficient Learning using Forward-Backward Splitting
4854    <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
4855
4856    .. math::
4857        \begin{array}{ll} \\
4858            accum += grad * grad \\
4859            \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\
4860            var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
4861        \end{array}
4862
4863    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
4864    to make the data types consistent.
4865    If they have different data types, the lower priority data type will be converted to
4866    the relatively highest priority data type.
4867
4868    Args:
4869        use_locking (bool): If ``True`` , the var and accumulation tensors will be protected from being updated.
4870            Default: ``False`` .
4871
4872    Inputs:
4873        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
4874          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4875        - **accum** (Parameter) - Accumulation to be updated, must have the same shape and dtype as `var`.
4876        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. The data type must be
4877          float16 or float32.
4878        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. The data type must be
4879          float16 or float32.
4880        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a scalar. The data type must be
4881          float16 or float32.
4882        - **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
4883
4884    Outputs:
4885        Tuple of 2 Tensors, the updated parameters.
4886
4887        - **var** (Tensor) - The same shape and data type as `var`.
4888        - **accum** (Tensor) - The same shape and data type as `accum`.
4889
4890    Raises:
4891        TypeError: If `use_blocking` is not a bool.
4892        TypeError: If dtype of `var`, `lr`, `l1` or `l2` is neither float16 nor float32.
4893        TypeError: If `lr`, `l1` or `l2` is neither a Number nor a Tensor.
4894        TypeError: If `grad` is not a Tensor.
4895        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
4896
4897    Supported Platforms:
4898        ``Ascend`` ``GPU`` ``CPU``
4899
4900    Examples:
4901        >>> import numpy as np
4902        >>> from mindspore import Tensor, nn, ops, Parameter
4903        >>> class Net(nn.Cell):
4904        ...     def __init__(self):
4905        ...         super(Net, self).__init__()
4906        ...         self.apply_proximal_adagrad = ops.ApplyProximalAdagrad()
4907        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
4908        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
4909        ...         self.accum = Parameter(Tensor(np.array([[0.6, 0.5],
4910        ...                                                 [0.2, 0.6]]).astype(np.float32)), name="accum")
4911        ...         self.lr = 0.01
4912        ...         self.l1 = 0.0
4913        ...         self.l2 = 0.0
4914        ...     def construct(self, grad):
4915        ...         out = self.apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad)
4916        ...         return out
4917        ...
4918        >>> net = Net()
4919        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
4920        >>> output = net(grad)
4921        >>> print(output)
4922        (Tensor(shape=[2, 2], dtype=Float32, value=
4923        [[ 5.96388459e-01,  3.92964751e-01],
4924         [ 9.78178233e-02,  4.92815793e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
4925        [[ 6.90000057e-01,  9.90000010e-01],
4926         [ 2.10000008e-01,  1.24000001e+00]]))
4927    """
4928
4929    __mindspore_signature__ = (
4930        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4931        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
4932        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
4933        sig.make_sig('l1', dtype=sig.sig_dtype.T2),
4934        sig.make_sig('l2', dtype=sig.sig_dtype.T3),
4935        sig.make_sig('grad', dtype=sig.sig_dtype.T)
4936    )
4937
4938    @prim_attr_register
4939    def __init__(self, use_locking=False):
4940        """Initialize ApplyProximalAdagrad."""
4941        self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad'],
4942                                outputs=['var', 'accum'])
4943        self.add_prim_attr('side_effect_mem', True)
4944        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
4945
4946
4947class SparseApplyProximalAdagrad(Primitive):
4948    r"""
4949    Updates relevant entries according to the proximal adagrad algorithm.
4950    Compared with :class:`mindspore.ops.ApplyProximalAdagrad`,
4951    an additional index tensor is input.
4952
4953    .. math::
4954        \begin{array}{ll} \\
4955            accum += grad * grad \\
4956            \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} \\
4957            var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
4958        \end{array}
4959
4960    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
4961    to make the data types consistent.
4962    If they have different data types, the lower priority data type will be converted to
4963    the relatively highest priority data type.
4964
4965    Args:
4966        use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
4967            Default: ``False`` .
4968
4969    Inputs:
4970        - **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
4971          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4972        - **accum** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
4973        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
4974          a scalar tensor with float16 or float32 data type. It must be positive.
4975        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
4976          a scalar tensor with float16 or float32 data type. It must be non-negative.
4977        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
4978          a scalar tensor with float16 or float32 data type. It must be non-negative.
4979        - **grad** (Tensor) - A tensor must meet with
4980          :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
4981        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
4982          If there are duplicates in `indices`, the behavior is undefined. Must be one of the
4983          following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
4984
4985    Outputs:
4986        Tuple of 2 tensors, the updated parameters.
4987
4988        - **var** (Tensor) - The same shape and data type as `var`.
4989        - **accum** (Tensor) - The same shape and data type as `accum`.
4990
4991    Raises:
4992        TypeError: If `use_locking` is not a bool.
4993        TypeError: If dtype of `var`, `accum`, `lr`, `l1`, `l2` or `grad` is neither float16 nor float32.
4994        TypeError: If dtype of `indices` is neither int32 nor int64.
4995        ValueError: If `lr` <= 0 or `l1` < 0 or `l2` < 0.
4996        RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
4997
4998    Supported Platforms:
4999        ``Ascend`` ``GPU``
5000
5001    Examples:
5002        >>> import numpy as np
5003        >>> from mindspore import Tensor, nn, ops, Parameter
5004        >>> class Net(nn.Cell):
5005        ...     def __init__(self):
5006        ...         super(Net, self).__init__()
5007        ...         self.sparse_apply_proximal_adagrad = ops.SparseApplyProximalAdagrad()
5008        ...         self.var = Parameter(Tensor(np.array([[4.1, 7.2], [1.1, 3.0]], np.float32)), name="var")
5009        ...         self.accum = Parameter(Tensor(np.array([[0, 0], [0, 0]], np.float32)), name="accum")
5010        ...         self.lr = 1.0
5011        ...         self.l1 = 1.0
5012        ...         self.l2 = 0.0
5013        ...     def construct(self, grad, indices):
5014        ...         out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
5015        ...                                                  self.l2, grad, indices)
5016        ...         return out
5017        ...
5018        >>> net = Net()
5019        >>> grad = Tensor(np.array([[1, 1], [1, 1]], np.float32))
5020        >>> indices = Tensor(np.array([0, 1], np.int32))
5021        >>> output = net(grad, indices)
5022        >>> print(output)
5023        (Tensor(shape=[2, 2], dtype=Float32, value=
5024        [[ 2.09999990e+00,  5.19999981e+00],
5025         [ 0.00000000e+00,  1.00000000e+00]]), Tensor(shape=[2, 2], dtype=Float32, value=
5026        [[ 1.00000000e+00,  1.00000000e+00],
5027         [ 1.00000000e+00,  1.00000000e+00]]))
5028    """
5029
5030    __mindspore_signature__ = (
5031        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5032        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5033        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
5034        sig.make_sig('l1', dtype=sig.sig_dtype.T2),
5035        sig.make_sig('l2', dtype=sig.sig_dtype.T3),
5036        sig.make_sig('grad', dtype=sig.sig_dtype.T),
5037        sig.make_sig('indices', dtype=sig.sig_dtype.T4)
5038    )
5039
5040    @prim_attr_register
5041    def __init__(self, use_locking=False):
5042        """Initialize SparseApplyProximalAdagrad."""
5043        self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
5044                                outputs=['var', 'accum'])
5045        self.add_prim_attr('side_effect_mem', True)
5046        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
5047
5048
5049class ApplyAddSign(Primitive):
5050    r"""
5051    Updates relevant entries according to the AddSign algorithm.
5052
5053    .. math::
5054        \begin{array}{ll} \\
5055            m_{t+1} = \beta * m_{t} + (1 - \beta) * g \\
5056            \text{update} = (\alpha + \text{sign_decay} * sign(g) * sign(m)) * g \\
5057            var = var - lr_{t+1} * \text{update}
5058        \end{array}
5059
5060    :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}`
5061    is the last moment of :math:`m_{t+1}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`,
5062    :math:`\alpha` represents `alpha`, :math:`\beta` represents `beta`.
5063
5064    The data type of all inputs must be float16 or float32 on Ascend and float16, float32 or float64 on CPU and GPU.
5065
5066    Inputs of `var`, `accum` and `grad` , `sign_decay` and `beta` comply with the implicit type conversion rules
5067    to make the data types consistent.
5068    If they have different data types, the lower priority data type will be converted to
5069    the relatively highest priority data type.
5070
5071    Inputs:
5072        - **var** (Parameter) - Variable tensor to be updated.
5073          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5074        - **m** (Parameter) - Variable tensor to be updated, has the same data type as `var`.
5075        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
5076        - **alpha** (Union[Number, Tensor]) - Must be a scalar.
5077        - **sign_decay** (Union[Number, Tensor]) - Must be a scalar.
5078        - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar.
5079        - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
5080
5081    Outputs:
5082        Tuple of 2 Tensors, the updated parameters.
5083
5084        - **var** (Tensor) - The same shape and data type as `var`.
5085        - **m** (Tensor) - The same shape and data type as `m`.
5086
5087    Raises:
5088        TypeError: If dtype of `var`, `lr` and `alpha` is not float16, float32 or float64.
5089        TypeError: If dtype of `sign_decay` and `beta` are both not float16, float32 or float64.
5090        TypeError: If `lr`, `alpha` or `sign_decay` is neither a Number nor a Tensor.
5091        TypeError: If `grad` is not a Tensor.
5092        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
5093
5094    Supported Platforms:
5095        ``Ascend`` ``GPU`` ``CPU``
5096
5097    Examples:
5098        >>> import numpy as np
5099        >>> from mindspore import Tensor, nn, ops, Parameter
5100        >>> class Net(nn.Cell):
5101        ...     def __init__(self):
5102        ...         super(Net, self).__init__()
5103        ...         self.apply_add_sign = ops.ApplyAddSign()
5104        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
5105        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
5106        ...         self.m = Parameter(Tensor(np.array([[0.6, 0.5],
5107        ...                                             [0.2, 0.6]]).astype(np.float32)), name="m")
5108        ...         self.lr = 0.001
5109        ...         self.alpha = 1.0
5110        ...         self.sign_decay = 0.99
5111        ...         self.beta = 0.9
5112        ...     def construct(self, grad):
5113        ...         out = self.apply_add_sign(self.var, self.m, self.lr, self.alpha, self.sign_decay, self.beta, grad)
5114        ...         return out
5115        ...
5116        >>> net = Net()
5117        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
5118        >>> output = net(grad)
5119        >>> print(output)
5120        (Tensor(shape=[2, 2], dtype=Float32, value=
5121        [[ 5.99403024e-01,  3.98607016e-01],
5122         [ 9.98010039e-02,  4.98407990e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
5123        [[ 5.70000052e-01,  5.19999981e-01],
5124         [ 1.89999998e-01,  6.20000064e-01]]))
5125    """
5126
5127    __mindspore_signature__ = (
5128        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5129        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5130        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
5131        sig.make_sig('alpha', dtype=sig.sig_dtype.T2),
5132        sig.make_sig('sign_decay', dtype=sig.sig_dtype.T3),
5133        sig.make_sig('beta', dtype=sig.sig_dtype.T3),
5134        sig.make_sig('grad', dtype=sig.sig_dtype.T)
5135    )
5136
5137    @prim_attr_register
5138    def __init__(self):
5139        """Initialize ApplyAddSign."""
5140        self.add_prim_attr('side_effect_mem', True)
5141
5142
5143class ApplyPowerSign(Primitive):
5144    r"""
5145    Updates relevant entries according to the AddSign algorithm.
5146
5147    The AddSign algorithm was proposed in `Neural Optimizer Search with Reinforcement Learning
5148    <https://arxiv.org/abs/1709.07417>`_.
5149
5150    .. math::
5151        \begin{array}{ll} \\
5152            m_{t+1} = \beta * m_{t} + (1 - \beta) * g \\
5153            \text{update} = \exp(\text{logbase} * \text{sign_decay} * sign(g) * sign(m)) * g \\
5154            var = var - lr_{t+1} * \text{update}
5155        \end{array}
5156
5157    :math:`t` represents updating step while :math:`m` represents the 1st moment vector, :math:`m_{t}`
5158    is the last moment of :math:`m_{t+1}`, :math:`lr` represents scaling factor `lr`, :math:`g` represents `grad`,
5159    :math:`\beta` represents `beta`.
5160
5161    All of inputs comply with the implicit type conversion rules to make the data types consistent.
5162    If `lr`, `logbase`, `sign_decay` or `beta` is a number, the number is automatically converted to Tensor,
5163    and the data type is consistent with the Tensor data type involved in the operation.
5164    If inputs are tensors and have different data types, the lower priority data type will be converted to
5165    the relatively highest priority data type.
5166
5167    Note:
5168        On Ascend, input data type of float64 is currently not supported.
5169
5170    Inputs:
5171        - **var** (Parameter) - Variable tensor to be updated. With float64, float32 or float16 data type.
5172          If data type of `var` is float16, all inputs must have the same data type as `var`.
5173          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5174        - **m** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
5175        - **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar or Tensor
5176          with float64, float32 or float16 data type.
5177        - **logbase** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or float16 data type.
5178        - **sign_decay** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or
5179          float16 data type.
5180        - **beta** (Union[Number, Tensor]) - The exponential decay rate, should be a scalar or Tensor
5181          with float64, float32 or float16 data type.
5182        - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
5183
5184    Outputs:
5185        Tuple of 2 Tensors, the updated parameters.
5186
5187        - **var** (Tensor) - The same shape and data type as `var`.
5188        - **m** (Tensor) - The same shape and data type as `m`.
5189
5190    Raises:
5191        TypeError: If dtype of `var`, `lr`, `logbase`, `sign_decay`, `beta` or `grad` is not one of float16,
5192        float32 or float64.
5193        TypeError: If `lr`, `logbase`, `sign_decay` or `beta` is neither a Number nor a Tensor.
5194        TypeError: If `grad` is not a Tensor.
5195        TypeError: If the data type of `lr`, `logbase`, `sign_decay` and `grad` conversion of Parameter
5196                      is not supported.
5197
5198    Supported Platforms:
5199        ``Ascend`` ``GPU`` ``CPU``
5200
5201    Examples:
5202        >>> import numpy as np
5203        >>> from mindspore import Tensor, nn, ops, Parameter
5204        >>> class Net(nn.Cell):
5205        ...     def __init__(self):
5206        ...         super(Net, self).__init__()
5207        ...         self.apply_power_sign = ops.ApplyPowerSign()
5208        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
5209        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
5210        ...         self.m = Parameter(Tensor(np.array([[0.6, 0.5],
5211        ...                                             [0.2, 0.6]]).astype(np.float32)), name="m")
5212        ...         self.lr = 0.001
5213        ...         self.logbase = np.e
5214        ...         self.sign_decay = 0.99
5215        ...         self.beta = 0.9
5216        ...     def construct(self, grad):
5217        ...         out = self.apply_power_sign(self.var, self.m, self.lr, self.logbase,
5218        ...                                        self.sign_decay, self.beta, grad)
5219        ...         return out
5220        ...
5221        >>> net = Net()
5222        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
5223        >>> output = net(grad)
5224        >>> print(output)
5225        (Tensor(shape=[2, 2], dtype=Float32, value=
5226        [[ 5.95575690e-01,  3.89676481e-01],
5227         [ 9.85252112e-02,  4.88201708e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
5228        [[ 5.70000052e-01,  5.19999981e-01],
5229         [ 1.89999998e-01,  6.20000064e-01]]))
5230    """
5231
5232    __mindspore_signature__ = (
5233        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5234        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5235        sig.make_sig('lr', dtype=sig.sig_dtype.T),
5236        sig.make_sig('logbase', dtype=sig.sig_dtype.T),
5237        sig.make_sig('sign_decay', dtype=sig.sig_dtype.T),
5238        sig.make_sig('beta', dtype=sig.sig_dtype.T),
5239        sig.make_sig('grad', dtype=sig.sig_dtype.T)
5240    )
5241
5242    @prim_attr_register
5243    def __init__(self):
5244        """Initialize ApplyPowerSign."""
5245        self.add_prim_attr('side_effect_mem', True)
5246
5247
5248class ApplyGradientDescent(Primitive):
5249    r"""
5250    Updates `var` by subtracting `alpha` * `delta` from it.
5251
5252    .. math::
5253        var = var - \alpha * \delta
5254
5255    where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`.
5256
5257    Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
5258    If they have different data types, the lower priority data type will be converted to
5259    the relatively highest priority data type.
5260
5261    Inputs:
5262        - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
5263          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5264        - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
5265        - **delta** (Tensor) - A tensor for the change, has the same shape as `var`.
5266
5267    Outputs:
5268        Tensor, represents the updated `var`.
5269
5270    Raises:
5271        TypeError: If dtype of `var` or `alpha` is neither float16 nor float32.
5272        TypeError: If `delta` is not a Tensor.
5273        TypeError: If `alpha` is neither a Number nor a Tensor.
5274        TypeError: If the data type of `var` and `delta` conversion of Parameter is not supported.
5275
5276    Supported Platforms:
5277        ``Ascend`` ``GPU`` ``CPU``
5278
5279    Examples:
5280        >>> import numpy as np
5281        >>> from mindspore import Tensor, nn, ops, Parameter
5282        >>> class Net(nn.Cell):
5283        ...     def __init__(self):
5284        ...         super(Net, self).__init__()
5285        ...         self.apply_gradient_descent = ops.ApplyGradientDescent()
5286        ...         self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var")
5287        ...         self.alpha = 0.001
5288        ...     def construct(self, delta):
5289        ...         out = self.apply_gradient_descent(self.var, self.alpha, delta)
5290        ...         return out
5291        ...
5292        >>> net = Net()
5293        >>> delta = Tensor(np.array([[0.1, 0.1], [0.1, 0.1]]).astype(np.float32))
5294        >>> output = net(delta)
5295        >>> print(output)
5296        [[0.9999 0.9999]
5297         [0.9999 0.9999]]
5298    """
5299
5300    __mindspore_signature__ = (
5301        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5302        sig.make_sig('alpha', dtype=sig.sig_dtype.T1),
5303        sig.make_sig('delta', dtype=sig.sig_dtype.T)
5304    )
5305
5306    @prim_attr_register
5307    def __init__(self):
5308        """Initialize ApplyGradientDescent."""
5309        self.add_prim_attr('side_effect_mem', True)
5310
5311
5312class ApplyProximalGradientDescent(Primitive):
5313    r"""
5314    Updates relevant entries according to the FOBOS(Forward Backward Splitting) algorithm.
5315    Refer to the paper `Efficient Learning using Forward-Backward Splitting
5316    <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_ for more details.
5317
5318    .. math::
5319        \begin{array}{ll} \\
5320            \text{prox_v} = var - \alpha * \delta \\
5321            var = \frac{sign(\text{prox_v})}{1 + \alpha * l2} * \max(\left| \text{prox_v} \right| - \alpha * l1, 0)
5322        \end{array}
5323
5324    where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`.
5325
5326    Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
5327    If they have different data types, the lower priority data type will be converted to
5328    the relatively highest priority data type.
5329
5330    Inputs:
5331        - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
5332          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5333        - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
5334        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar.
5335          With float32 or float16 data type.
5336        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a scalar.
5337          With float32 or float16 data type.
5338        - **delta** (Tensor) - A tensor for the change.
5339
5340    Outputs:
5341        Tensor, represents the updated `var`.
5342
5343    Raises:
5344        TypeError: If dtype of `var`, `alpha`, `l1` or `l2` is neither float16 nor float32.
5345        TypeError: If `alpha`, `l1` or `l2` is neither a Number nor a Tensor.
5346        TypeError: If `delta` is not a Tensor.
5347        TypeError: If the data type of `var`, and `delta` conversion of Parameter is not supported.
5348
5349    Supported Platforms:
5350        ``Ascend`` ``GPU`` ``CPU``
5351
5352    Examples:
5353        >>> import numpy as np
5354        >>> from mindspore import Tensor, nn, ops, Parameter
5355        >>> class Net(nn.Cell):
5356        ...     def __init__(self):
5357        ...         super(Net, self).__init__()
5358        ...         self.apply_proximal_gradient_descent = ops.ApplyProximalGradientDescent()
5359        ...         self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var")
5360        ...         self.alpha = 0.001
5361        ...         self.l1 = 0.1
5362        ...         self.l2 = 0.1
5363        ...     def construct(self, delta):
5364        ...         out = self.apply_proximal_gradient_descent(self.var, self.alpha, self.l1, self.l2, delta)
5365        ...         return out
5366        ...
5367        >>> net = Net()
5368        >>> delta = Tensor(np.array([[0.1, 0.1], [0.1, 0.1]]).astype(np.float32))
5369        >>> output = net(delta)
5370        >>> print(output)
5371        [[0.99969995 0.99969995]
5372         [0.99969995 0.99969995]]
5373    """
5374
5375    __mindspore_signature__ = (
5376        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5377        sig.make_sig('alpha', dtype=sig.sig_dtype.T1),
5378        sig.make_sig('l1', dtype=sig.sig_dtype.T2),
5379        sig.make_sig('l2', dtype=sig.sig_dtype.T3),
5380        sig.make_sig('delta', dtype=sig.sig_dtype.T)
5381    )
5382
5383    @prim_attr_register
5384    def __init__(self):
5385        """Initialize ApplyGradientDescent."""
5386        self.add_prim_attr('side_effect_mem', True)
5387
5388
5389class LARSUpdate(PrimitiveWithInfer):
5390    """
5391    Conducts LARS (layer-wise adaptive rate scaling) update on the sum of squares of gradient.
5392
5393    For more details, please refer to :class:`mindspore.nn.LARS`.
5394
5395    Args:
5396        epsilon (float, optional): Term added to the denominator to improve numerical stability.
5397            Default: ``1e-05`` .
5398        hyperpara (float, optional): Trust coefficient for calculating the local learning rate.
5399            Default: ``0.001`` .
5400        use_clip (bool, optional): Whether to use clip operation for calculating the local learning rate.
5401            Default: ``False`` .
5402
5403    Inputs:
5404        - **weight** (Tensor) - A tensor, representing the weight.
5405          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5406        - **gradient** (Tensor) - The gradient of weight, which has the same shape and dtype with weight.
5407        - **norm_weight** (Tensor) - A scalar tensor, representing the sum of squares of weight.
5408        - **norm_gradient** (Tensor) - A scalar tensor, representing the sum of squares of gradient.
5409        - **weight_decay** (Union[Number, Tensor]) - Weight decay. It must be a scalar tensor or number.
5410        - **learning_rate** (Union[Number, Tensor]) - Learning rate. It must be a scalar tensor or number.
5411
5412    Outputs:
5413        Tensor, represents the new gradient.
5414
5415    Raises:
5416        TypeError: If neither `epsilon` nor `hyperpara` is a float.
5417        TypeError: If `use_clip` is not a bool.
5418        TypeError: If `weight`, `gradient`, `norm_weight` or `norm_gradient` is not a Tensor.
5419        TypeError: If `weight_decay` or `learning_rate` is neither a Number nor a Tensor.
5420        TypeError: If shape of `gradient` is not the same as `weight`.
5421
5422    Supported Platforms:
5423        ``Ascend``
5424
5425    Examples:
5426        >>> import numpy as np
5427        >>> from mindspore import Tensor, nn, ops
5428        >>> class Net(nn.Cell):
5429        ...     def __init__(self):
5430        ...         super(Net, self).__init__()
5431        ...         self.lars = ops.LARSUpdate()
5432        ...         self.reduce = ops.ReduceSum()
5433        ...         self.square = ops.Square()
5434        ...     def construct(self, weight, gradient):
5435        ...         w_square_sum = self.reduce(self.square(weight))
5436        ...         grad_square_sum = self.reduce(self.square(gradient))
5437        ...         grad_t = self.lars(weight, gradient, w_square_sum, grad_square_sum, 0.0, 1.0)
5438        ...         return grad_t
5439        ...
5440        >>> weight = Tensor(np.array([[0.5, 0.8, 0.2], [0.6, 0.4, 0.2]]).astype(np.float32))
5441        >>> gradient = Tensor(np.array([[0.4, 0.4, 0.5], [0.2, 0.4, 0.3]]).astype(np.float32))
5442        >>> net = Net()
5443        >>> output = net(Tensor(weight), Tensor(gradient))
5444        >>> print(output)
5445        [[0.0005265  0.0005265 0.00065813]
5446         [0.00026325 0.0005265 0.00039488]]
5447    """
5448
5449    @prim_attr_register
5450    def __init__(self, epsilon=1e-05, hyperpara=0.001, use_clip=False):
5451        """Initialize LARSUpdate."""
5452        validator.check_value_type("epsilon", epsilon, [float], self.name)
5453        validator.check_value_type("hyperpara", hyperpara, [float], self.name)
5454        validator.check_value_type("use_clip", use_clip, [bool], self.name)
5455
5456
5457class ApplyFtrl(Primitive):
5458    """
5459    Updates relevant entries according to the FTRL scheme.
5460
5461    For more details, please refer to :class:`mindspore.nn.FTRL`.
5462
5463    Note:
5464        - Currently, only positive numbers are supported on the Ascend platform,
5465          and the calculation results for other scenarios are not defined.
5466        - Inputs of `var`, `accum`, `linear` and `grad` comply with the implicit type conversion rules
5467          to make the data types consistent.
5468          If they have different data types, the lower priority data type will be converted to
5469          the relatively highest priority data type.
5470
5471    Args:
5472        use_locking (bool): Use locks for updating operation if ``True`` . Default: ``False`` .
5473
5474    Inputs:
5475        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
5476          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5477        - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
5478        - **linear** (Parameter) - The linear coefficient to be updated, must be same shape as `var`.
5479        - **grad** (Tensor) - Gradient. The data type must be float16 or float32.
5480        - **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: ``0.001`` .
5481          It must be a float number or a scalar tensor with float16 or float32 data type.
5482        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be greater than or equal to zero.
5483          Default: ``0.0`` . It must be a float number or a scalar tensor with float16 or float32 data type.
5484        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be greater than or equal to zero.
5485          Default: ``0.0`` . It must be a float number or a scalar tensor with float16 or float32 data type.
5486        - **lr_power** (Union[Number, Tensor]) - Learning rate power controls how the learning rate decreases
5487          during training, must be less than or equal to zero. Use fixed learning rate if lr_power is zero.
5488          Default: ``-0.5`` . It must be a float number or a scalar tensor with float16 or float32 data type.
5489
5490    Outputs:
5491        - **var** (Tensor) - Represents the updated `var`. As the input parameters has been updated in-place, this
5492          value is always zero when the platform is GPU.
5493
5494    Raises:
5495        TypeError: If `use_locking` is not a bool.
5496        TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
5497        TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
5498        TypeError: If `grad` is not a Tensor.
5499        TypeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
5500        TypeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
5501            and the precision is greater than `var`.
5502
5503    Supported Platforms:
5504        ``Ascend`` ``GPU`` ``CPU``
5505
5506    Examples:
5507        >>> import numpy as np
5508        >>> from mindspore import Tensor, nn, ops, Parameter
5509        >>> class ApplyFtrlNet(nn.Cell):
5510        ...     def __init__(self):
5511        ...         super(ApplyFtrlNet, self).__init__()
5512        ...         self.apply_ftrl = ops.ApplyFtrl()
5513        ...         self.lr = 0.001
5514        ...         self.l1 = 0.0
5515        ...         self.l2 = 0.0
5516        ...         self.lr_power = -0.5
5517        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4],
5518        ...                                               [0.1, 0.5]]).astype(np.float32)), name="var")
5519        ...         self.accum = Parameter(Tensor(np.array([[0.6, 0.5],
5520        ...                                                 [0.2, 0.6]]).astype(np.float32)), name="accum")
5521        ...         self.linear = Parameter(Tensor(np.array([[0.9, 0.1],
5522        ...                                                  [0.7, 0.8]]).astype(np.float32)), name="linear")
5523        ...
5524        ...     def construct(self, grad):
5525        ...         out = self.apply_ftrl(self.var, self.accum, self.linear, grad, self.lr, self.l1, self.l2,
5526        ...                               self.lr_power)
5527        ...         return out
5528        ...
5529        >>> net = ApplyFtrlNet()
5530        >>> input_x = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
5531        >>> output = net(input_x)
5532        >>> print(net.var.asnumpy())
5533        [[ 0.0390525  0.11492836]
5534         [ 0.00066425 0.15075898]]
5535    """
5536
5537    __mindspore_signature__ = (
5538        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5539        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5540        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5541        sig.make_sig('grad', dtype=sig.sig_dtype.T),
5542        sig.make_sig('lr', dtype=sig.sig_dtype.T),
5543        sig.make_sig('l1', dtype=sig.sig_dtype.T),
5544        sig.make_sig('l2', dtype=sig.sig_dtype.T),
5545        sig.make_sig('lr_power', dtype=sig.sig_dtype.T)
5546    )
5547
5548    @prim_attr_register
5549    def __init__(self, use_locking=False):
5550        """Initialize ApplyFtrl."""
5551        self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'lr', 'l1', 'l2', 'lr_power'],
5552                                outputs=['output'])
5553        self.add_prim_attr('side_effect_mem', True)
5554        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
5555
5556
5557class SparseApplyFtrl(Primitive):
5558    """
5559    Updates relevant entries according to the FTRL-proximal scheme
5560    For more details, please refer to :class:`mindspore.nn.FTRL`.
5561
5562    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
5563    If they have different data types, the lower priority data type will be converted to
5564    the relatively highest priority data type.
5565
5566    Args:
5567        lr (float): The learning rate value, must be positive.
5568        l1 (float): l1 regularization strength, must be greater than or equal to zero.
5569        l2 (float): l2 regularization strength, must be greater than or equal to zero.
5570        lr_power (float): Learning rate power controls how the learning rate decreases during training,
5571            must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
5572        use_locking (bool, optional): Use locks for updating operation if ``True`` . Default: ``False`` .
5573
5574    Inputs:
5575        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
5576          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
5577        - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
5578        - **linear** (Parameter) - The linear coefficient to be updated, must be the same shape as `var`.
5579        - **grad** (Tensor) - A tensor must meet with :math:`grad.shape[1:] = var.shape[1:]`
5580          if var.shape > 1.
5581        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
5582          If there are duplicates in `indices`, the behavior is undefined.
5583          The type must be int32 or int64 and :math:`indices.shape[0] = grad.shape[0]`.
5584
5585    Outputs:
5586        - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
5587        - **accum** (Tensor) - Tensor, has the same shape and data type as `accum`.
5588        - **linear** (Tensor) - Tensor, has the same shape and data type as `linear`.
5589
5590    Raises:
5591        TypeError: If `lr`, `l1`, `l2` or `lr_power` is not a float.
5592        TypeError: If `use_locking` is not a bool.
5593        TypeError: If dtype of `var`, `accum`, `linear` or `grad` is neither float16 nor float32.
5594        TypeError: If dtype of `indices` is neither int32 nor int64.
5595        RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported.
5596
5597    Supported Platforms:
5598        ``Ascend`` ``GPU`` ``CPU``
5599
5600    Examples:
5601        >>> import mindspore
5602        >>> import numpy as np
5603        >>> from mindspore import Tensor, nn, Parameter, ops
5604        >>> class SparseApplyFtrlNet(nn.Cell):
5605        ...     def __init__(self):
5606        ...         super(SparseApplyFtrlNet, self).__init__()
5607        ...         self.sparse_apply_ftrl = ops.SparseApplyFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
5608        ...         self.var = Parameter(Tensor(np.array([[0.2]]).astype(np.float32)), name="var")
5609        ...         self.accum = Parameter(Tensor(np.array([[0.1]]).astype(np.float32)), name="accum")
5610        ...         self.linear = Parameter(Tensor(np.array([[0.6]]).astype(np.float32)), name="linear")
5611        ...
5612        ...     def construct(self, grad, indices):
5613        ...         out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
5614        ...         return out
5615        ...
5616        >>> net = SparseApplyFtrlNet()
5617        >>> grad = Tensor(np.array([[0.7]]).astype(np.float32))
5618        >>> indices = Tensor(np.ones([1]), mindspore.int32)
5619        >>> output = net(grad, indices)
5620        >>> print(output)
5621        (Tensor(shape=[1, 1], dtype=Float32, value=
5622        [[2.00000003e-01]]), Tensor(shape=[1, 1], dtype=Float32, value=
5623        [[1.00000001e-01]]), Tensor(shape=[1, 1], dtype=Float32, value=
5624        [[6.00000024e-01]]))
5625    """
5626
5627    __mindspore_signature__ = (
5628        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5629        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5630        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5631        sig.make_sig('grad', dtype=sig.sig_dtype.T),
5632        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
5633    )
5634
5635    @prim_attr_register
5636    def __init__(self, lr, l1, l2, lr_power, use_locking=False):
5637        """Initialize SparseApplyFtrl."""
5638        validator.check_value_type("lr", lr, [float], self.name)
5639        validator.check_value_type("l1", l1, [float], self.name)
5640        validator.check_value_type("l2", l2, [float], self.name)
5641        validator.check_value_type("lr_power", lr_power, [float], self.name)
5642        self.lr = validator.check_positive_float(lr, "lr", self.name)
5643        self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
5644        self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
5645        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
5646        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
5647        self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
5648                                outputs=['var', 'accum', 'linear'])
5649        self.add_prim_attr('side_effect_mem', True)
5650
5651
5652class SparseApplyFtrlV2(PrimitiveWithInfer):
5653    """
5654    The SparseApplyFtrlV2 interface is deprecated, please use the :class:`mindspore.ops.SparseApplyFtrl` instead.
5655
5656    Supported Platforms:
5657        Deprecated
5658    """
5659
5660    __mindspore_signature__ = (
5661        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5662        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5663        sig.make_sig('linear', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
5664        sig.make_sig('grad', dtype=sig.sig_dtype.T),
5665        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
5666    )
5667
5668    @deprecated("2.1", "ops.SparseApplyFtrl", False)
5669    @prim_attr_register
5670    def __init__(self, lr, l1, l2, l2_shrinkage, lr_power, use_locking=False):
5671        """Initialize SparseApplyFtrlV2."""
5672        validator.check_value_type("lr", lr, [float], self.name)
5673        validator.check_value_type("l1", l1, [float], self.name)
5674        validator.check_value_type("l2", l2, [float], self.name)
5675        validator.check_value_type("lr_power", lr_power, [float], self.name)
5676        self.lr = validator.check_positive_float(lr, "lr", self.name)
5677        self.l1 = validator.check_non_negative_float(l1, "l1", self.name)
5678        self.l2 = validator.check_non_negative_float(l2, "l2", self.name)
5679        self.lr_power = validator.check_number("lr_power", lr_power, 0, validator.LE, self.name)
5680        self.l2_shrinkage = validator.check_value_type("l2_shrinkage", l2_shrinkage, [float], self.name)
5681        self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
5682        self.add_prim_attr('side_effect_mem', True)
5683
5684    def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
5685        validator.check('var shape', var_shape, 'accum shape', accum_shape, validator.EQ, self.name)
5686        validator.check('var shape', var_shape, 'linear shape', linear_shape, validator.EQ, self.name)
5687        if len(var_shape) > 1:
5688            validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], validator.EQ, self.name)
5689        validator.check_int(len(indices_shape), 1, validator.EQ, "indices rank", self.name)
5690        validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], validator.EQ, self.name)
5691        return var_shape, accum_shape, linear_shape
5692
5693    def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
5694        args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
5695                "linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
5696        validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name)
5697        validator.check_tensor_dtype_valid("indicese", indices_dtype, [mstype.int32], self.name)
5698        return var_dtype, accum_dtype, linear_dtype
5699
5700
5701class Dropout2D(PrimitiveWithInfer):
5702    r"""
5703    During training, randomly zeroes some channels of the input tensor with probability :math:`1-keep\_prob`
5704    from a Bernoulli distribution(For a 4-dimensional tensor with a shape of :math:`(N, C, H, W)`,
5705    the channel feature map refers
5706    to a 2-dimensional feature map with the shape of :math:`(H, W)`).
5707
5708    Dropout2D can improve the independence between channel feature maps.
5709
5710    Note:
5711        The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout2d`.
5712
5713    Args:
5714        keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
5715            means dropping out 20% of channels. Default: ``0.5`` .
5716
5717    Inputs:
5718        - **x** (Tensor) - A 4-D tensor with shape :math:`(N, C, H, W)`, where N is the batch size, C is the number
5719          of channels, H is the feature height, and W is the feature width.
5720
5721    Outputs:
5722        - **output** (Tensor) - With the same shape and data type as `x`.
5723        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
5724
5725    Raises:
5726        TypeError: If `x` is not a Tensor.
5727        TypeError: If the data type of `keep_prob` is not float.
5728        ValueError: If `keep_prob` is out of the range `[0.0, 1.0]`.
5729        ValueError: If `x` shape is not `4D`.
5730
5731    Supported Platforms:
5732        ``Ascend`` ``GPU`` ``CPU``
5733
5734    Examples:
5735        >>> import mindspore
5736        >>> import numpy as np
5737        >>> from mindspore import Tensor, ops
5738        >>> dropout = ops.Dropout2D(keep_prob=0.5)
5739        >>> x = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
5740        >>> output, mask = dropout(x)
5741        >>> print(output.shape)
5742        (2, 1, 2, 3)
5743    """
5744
5745    @prim_attr_register
5746    def __init__(self, keep_prob=0.5):
5747        """Initialize Dropout2D."""
5748        super().__init__("Dropout2D")
5749        self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
5750        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
5751
5752
5753class Dropout3D(PrimitiveWithInfer):
5754    r"""
5755    During training, randomly zeroes some channels of the input tensor
5756    with probability :math:`1-keep\_prob` from a Bernoulli distribution(For a 5-dimensional
5757    tensor with a shape of NCDHW,
5758    the channel feature map refers to a 3-dimensional feature map with a shape of DHW).
5759
5760    Note:
5761        The keep probability :math:`keep\_prob` is equal to :math:`1 - p` in :func:`mindspore.ops.dropout3d`.
5762
5763    Dropout3D can improve the independence between channel feature maps.
5764
5765    Args:
5766        keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
5767            means dropping out 20% of channels. Default: ``0.5`` .
5768
5769    Inputs:
5770        - **x** (Tensor) - A 5-D tensor with shape :math:`(N, C, D, H, W)`, where N is the batch size, C is the number
5771          of channels, D is the feature depth, H is the feature height, and W is the feature width.
5772
5773    Outputs:
5774        - **output** (Tensor) - With the same shape and data type as `x`.
5775        - **mask** (Tensor) - With the same shape as `x` and the data type is bool.
5776
5777    Raises:
5778        TypeError: If the data type of `keep_prob` is not float.
5779        ValueError: If `keep_prob` is out of the range [0.0, 1.0];
5780                    or if the dim of input is not 5-D.
5781
5782    Supported Platforms:
5783        ``Ascend`` ``GPU`` ``CPU``
5784
5785    Examples:
5786        >>> import mindspore
5787        >>> import numpy as np
5788        >>> from mindspore import Tensor, ops
5789        >>> dropout = ops.Dropout3D(keep_prob=0.5)
5790        >>> x = Tensor(np.ones([2, 1, 2, 1, 2]), mindspore.float32)
5791        >>> output, mask = dropout(x)
5792        >>> print(output.shape)
5793        (2, 1, 2, 1, 2)
5794    """
5795
5796    @prim_attr_register
5797    def __init__(self, keep_prob=0.5):
5798        """Initialize Dropout3D."""
5799        super().__init__("Dropout3D")
5800        self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
5801        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
5802
5803
5804class CTCLoss(Primitive):
5805    r"""
5806    Calculates the CTC (Connectionist Temporal Classification) loss and the gradient.
5807
5808    The bottom layer of this interface calls the implementation of the third-party baidu-research::warp-ctc.
5809    The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with
5810    Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_.
5811
5812    CTCLoss calculates loss between a continuous time series and a target sequence.
5813    CTCLoss sums over the probability of input to target, producing a loss value which is differentiable with
5814    respect to each input node. The alignment of input to target is assumed to be “many-to-one”,
5815    such that the length of target series must be less than or equal to the length of input.
5816
5817    Args:
5818        preprocess_collapse_repeated (bool): If ``True`` , repeated labels will be collapsed prior to the CTC
5819                                             calculation. Default: ``False`` .
5820        ctc_merge_repeated (bool): If ``False`` , during CTC calculation, repeated non-blank labels will not be merged
5821                                   and these labels will be interpreted as individual ones. This is a simplified
5822                                   version of CTC. Default: ``True`` .
5823        ignore_longer_outputs_than_inputs (bool): If ``True`` , sequences with longer outputs than inputs will be
5824                                                  ignored. Default: ``False`` .
5825
5826    Inputs:
5827        - **x** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
5828          :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes, `num_labels`
5829          indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
5830          Data type must be float16, float32 or float64.
5831        - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] = [b, t]` means
5832          `labels_values[i]` stores the id for `(batch b, time t)`. The type must be int64 and rank must be 2.
5833        - **labels_values** (Tensor) - A `1-D` input tensor. The values are associated with the given batch and time.
5834          The type must be int32. `labels_values[i]` must be in the range of `[0, num_classes)`.
5835        - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`.
5836          The type must be int32. Each value in the tensor must not be greater than `max_time`.
5837
5838    Outputs:
5839        - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch\_size, )`.
5840          The tensor has the same data type as `x`.
5841        - **gradient** (Tensor) - The gradient of `loss`, has the same shape and data type as `x`.
5842
5843    Raises:
5844        TypeError: If `preprocess_collapse_repeated`, `ctc_merge_repeated` or `ignore_longer_outputs_than_inputs`
5845                   is not a bool.
5846        TypeError: If `x`, `labels_indices`, `labels_values` or `sequence_length` is not a Tensor.
5847        ValueError: If rank of `labels_indices` is not equal to 2.
5848        TypeError: If dtype of `x` is not one of the following: float16, float32 nor float64.
5849        TypeError: If dtype of `labels_indices` is not int64.
5850        TypeError: If dtype of `labels_values` or `sequence_length` is not int32.
5851
5852    Supported Platforms:
5853        ``Ascend`` ``GPU`` ``CPU``
5854
5855    Examples:
5856        >>> import mindspore
5857        >>> import numpy as np
5858        >>> from mindspore import Tensor, ops
5859        >>> x = Tensor(np.array([[[0.3, 0.6, 0.6],
5860        ...                       [0.4, 0.3, 0.9]],
5861        ...
5862        ...                      [[0.9, 0.4, 0.2],
5863        ...                       [0.9, 0.9, 0.1]]]).astype(np.float32))
5864        >>> labels_indices = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int64)
5865        >>> labels_values = Tensor(np.array([2, 2]), mindspore.int32)
5866        >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
5867        >>> ctc_loss = ops.CTCLoss()
5868        >>> loss, gradient = ctc_loss(x, labels_indices, labels_values, sequence_length)
5869        >>> print(loss)
5870        [ 0.79628  0.5995158 ]
5871        >>> print(gradient)
5872        [[[ 0.27029088  0.36485454  -0.6351454  ]
5873          [ 0.28140804  0.25462854  -0.5360366 ]]
5874         [[ 0.47548494  0.2883962    0.04510255 ]
5875          [ 0.4082751   0.4082751    0.02843709 ]]]
5876    """
5877
5878    @prim_attr_register
5879    def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True,
5880                 ignore_longer_outputs_than_inputs=False):
5881        """Initialize CTCLoss."""
5882        self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"],
5883                                outputs=["loss", "gradient"])
5884        validator.check_value_type("preprocess_collapse_repeated", preprocess_collapse_repeated, [bool], self.name)
5885        self.preprocess_collapse_repeated_ = preprocess_collapse_repeated
5886        self.ctc_merge_repeated_ = validator.check_value_type("ctc_merge_repeated", ctc_merge_repeated,
5887                                                              [bool], self.name)
5888        validator.check_value_type("ignore_longer_outputs_than_inputs",
5889                                   ignore_longer_outputs_than_inputs, [bool], self.name)
5890        self.ignore_longer_outputs_than_inputs_ = ignore_longer_outputs_than_inputs
5891
5892
5893class CTCGreedyDecoder(Primitive):
5894    r"""
5895    Performs greedy decoding on the logits given in inputs.
5896
5897    Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details.
5898
5899    Note:
5900        On Ascend, 'merge_repeated' can not be set to false.
5901
5902    Args:
5903        merge_repeated (bool, optional): If ``True`` , merge repeated classes in output. Default: ``True`` .
5904
5905    Inputs:
5906        - **inputs** (Tensor) - The input Tensor must be a 3-D tensor whose shape is
5907          :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
5908          `num_labels` indicates the number of actual labels. Blank labels are reserved.
5909          Default blank label is `num_classes - 1`. Data type must be float32 or float64.
5910        - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`.
5911          The type must be int32. Each value in the tensor must be equal to or less than `max_time`.
5912
5913    Outputs:
5914        - **decoded_indices** (Tensor) - A tensor with shape of :math:`(total\_decoded\_outputs, 2)`.
5915          Data type is int64.
5916        - **decoded_values** (Tensor) - A tensor with shape of :math:`(total\_decoded\_outputs, )`,
5917          it stores the decoded classes. Data type is int64.
5918        - **decoded_shape** (Tensor) - A tensor with shape of :math:`(batch\_size, max\_decoded\_length)`.
5919          Data type is int64.
5920        - **log_probability** (Tensor) - A tensor with shape of :math:`(batch\_size, 1)`,
5921          containing sequence log-probability, has the same type as `inputs`.
5922
5923    Supported Platforms:
5924        ``Ascend`` ``GPU`` ``CPU``
5925
5926    Examples:
5927        >>> import mindspore
5928        >>> import numpy as np
5929        >>> from mindspore import Tensor, ops
5930        >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]],
5931        ...                           [[0.0, 0.6, 0.0], [0.5, 0.4, 0.5]]]), mindspore.float32)
5932        >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
5933        >>> decoded_indices, decoded_values, decoded_shape, log_probability = ops.CTCGreedyDecoder()(inputs,
5934        ...                                                                                          sequence_length)
5935        >>> print(decoded_indices)
5936        [[0 0]
5937         [0 1]
5938         [1 0]]
5939        >>> print(decoded_values)
5940        [0 1 0]
5941        >>> print(decoded_shape)
5942        [2 2]
5943        >>> print(log_probability)
5944        [[-1.2]
5945         [-1.3]]
5946    """
5947
5948    @prim_attr_register
5949    def __init__(self, merge_repeated=True):
5950        """Initialize CTCGreedyDecoder."""
5951        self.merge_repeated = validator.check_value_type("merge_repeated", merge_repeated, [bool], self.name)
5952
5953
5954class BasicLSTMCell(PrimitiveWithInfer):
5955    """
5956    It's similar to operator :class:`mindspore.ops.DynamicRNN`. BasicLSTMCell will be deprecated in the future.
5957    Please use DynamicRNN instead.
5958
5959    Supported Platforms:
5960        Deprecated
5961    """
5962
5963    @prim_attr_register
5964    def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'):
5965        """Initialize BasicLSTMCell."""
5966        self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
5967        self.keep_prob = validator.check_float_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, "keep_prob", self.name)
5968        self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
5969        self.state_is_tuple = validator.check_value_type("state_is_tuple", state_is_tuple, [bool], self.name)
5970        self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
5971
5972    def infer_shape(self, x_shape, h_shape, c_shape, w_shape, b_shape):
5973        validator.check_int(len(x_shape), 2, validator.EQ, "x rank", self.name)
5974        validator.check_int(len(h_shape), 2, validator.EQ, "h rank", self.name)
5975        validator.check_int(len(c_shape), 2, validator.EQ, "c rank", self.name)
5976        validator.check_int(len(w_shape), 2, validator.EQ, "w rank", self.name)
5977        validator.check_int(len(b_shape), 1, validator.EQ, "b rank", self.name)
5978        validator.check("x_shape[0]", x_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
5979        validator.check("c_shape[0]", c_shape[0], "h_shape[0]", h_shape[0], validator.EQ, self.name)
5980        validator.check("c_shape[1]", c_shape[1], "h_shape[1]", h_shape[1], validator.EQ, self.name)
5981        validator.check("w_shape[1]", w_shape[1], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
5982        validator.check("w_shape[0]", w_shape[0], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1],
5983                        validator.EQ, self.name)
5984        validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], validator.EQ, self.name)
5985        ct_shape = c_shape
5986        ht_shape = c_shape
5987        it_shape = c_shape
5988        jt_shape = c_shape
5989        ft_shape = c_shape
5990        ot_shape = c_shape
5991        tanhct_shape = c_shape
5992
5993        return ct_shape, ht_shape, it_shape, jt_shape, ft_shape, ot_shape, tanhct_shape
5994
5995    def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype, b_dtype):
5996        tuple(map(partial(validator.check_tensor_dtype_valid,
5997                          valid_dtypes=(mstype.float16, mstype.float32), prim_name=self.name),
5998                  ("x_dtype", "h_dtype", "w_dtype"),
5999                  (x_dtype, h_dtype, w_dtype)))
6000        args = {"c_dtype": c_dtype, "b_dtype": b_dtype}
6001        validator.check_tensors_dtypes_same_and_valid(args, [mstype.float16, mstype.float32], self.name)
6002        return c_dtype, mstype.float16, c_dtype, c_dtype, c_dtype, c_dtype, c_dtype
6003
6004
6005class DynamicRNN(Primitive):
6006    r"""
6007    Applies a recurrent neural network to the input.
6008    Only long short-term memory (LSTM) is supported currently.
6009
6010    .. math::
6011        \begin{array}{ll} \\
6012            i_{t+1} = \sigma(W_{ix} x_{t+1} + b_{ix} + W_{ih} h_{(t)} + b_{ih}) \\
6013            f_{t+1} = \sigma(W_{fx} x_{t+1} + b_{fx} + W_{fh} h_{(t)} + b_{fh}) \\
6014            \tilde{c}_{t+1} = \tanh(W_{cx} x_{t+1} + b_{cx} + W_{ch} h_{(t)} + b_{ch}) \\
6015            o_{t+1} = \sigma(W_{ox} x_{t+1} + b_{ox} + W_{oh} h_{(t)} + b_{oh}) \\
6016            c_{t+1} = f_{t+1} * c_{(t)} + i_t * \tilde{c}_{t+1} \\
6017            h_{t+1} = o_{t+1} * \tanh(c_{t+1}) \\
6018        \end{array}
6019
6020    :math:`h_{t+1}` is the hidden state at time `t+1`. :math:`x_{t+1}` is the input
6021    at time `t+1`. :math:`h_{t}` is the hidden state of the layer
6022    at time `t` or the initial hidden state at time `0`.
6023    :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
6024    are learnable weights between the output and the input in the formula. For instance,
6025    :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
6026
6027    Args:
6028        cell_type (str, optional): A string identifying the cell type in the operator. Default: ``'LSTM'`` .
6029            Only 'LSTM' is currently supported.
6030        direction (str, optional): A string identifying the direction in the operator. Default: ``'UNIDIRECTIONAL'`` .
6031            Only 'UNIDIRECTIONAL' is currently supported.
6032        cell_depth (int, optional): An integer identifying the cell depth in the operator. Default: ``1`` .
6033        use_peephole (bool, optional): A bool identifying if use peephole in the operator. Default: ``False`` .
6034        keep_prob (float, optional): A float identifying the keep prob in the operator. Default: ``1.0`` .
6035        cell_clip (float, optional): A float identifying the cell clip in the operator. Default: ``-1.0`` .
6036        num_proj (int, optional): An integer identifying the number projection in the operator. Default: ``0`` .
6037        time_major (bool, optional): A bool specify the data format of `x`. If it is set to ``True`` , the format is
6038            :math:`(num\_step, batch\_size, input\_size)`, if it is set to False, the format is
6039            :math:`(batch\_size, num\_step, input\_size)`.
6040            Default: ``True`` . Only supports ``True`` at present.
6041        activation (str, optional): A string identifying the type of activation function in the operator.
6042            Default: ``'tanh'`` . Only 'tanh' is currently supported.
6043        forget_bias (float, optional): A float identifying the forget bias in the operator. Default: ``0.0`` .
6044        is_training (bool, optional): A bool identifying is training in the operator. Default: ``True`` .
6045
6046    Inputs:
6047        - **x** (Tensor) - Current words. Tensor of shape :math:`(num\_step, batch\_size, input\_size)`.
6048          The data type must be float16.
6049        - **w** (Tensor) - Weight. Tensor of shape :math:`(input\_size + hidden\_size, 4 * hidden\_size)`.
6050          The data type must be float16.
6051        - **b** (Tensor) - Bias. Tensor of shape :math:`(4 * hidden\_size)`.
6052          The data type must be float16.
6053        - **seq_length** (Tensor) - The length of each batch. Tensor of shape :math:`(batch\_size, )`.
6054          Only `None` is currently supported.
6055        - **init_h** (Tensor) - Hidden state of initial time. Tensor of shape :math:`(1, batch\_size, hidden\_size)`.
6056          The data type must be float16.
6057        - **init_c** (Tensor) - Cell state of initial time. Tensor of shape :math:`(1, batch\_size, hidden\_size)`.
6058          The data type must be float16.
6059
6060    Outputs:
6061        - **y** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6062          Has the same type with input `b`.
6063        - **output_h** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6064          With data type of float16.
6065        - **output_c** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6066          Has the same type with input `b`.
6067        - **i** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6068          Has the same type with input `b`.
6069        - **j** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6070          Has the same type with input `b`.
6071        - **f** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6072          Has the same type with input `b`.
6073        - **o** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6074          Has the same type with input `b`.
6075        - **tanhct** (Tensor) - A Tensor of shape :math:`(num\_step, batch\_size, hidden\_size)`.
6076          Has the same type with input `b`.
6077
6078    Raises:
6079        TypeError: If `cell_type`, `direction` or `activation` is not a str.
6080        TypeError: If `cell_depth` or `num_proj` is not an int.
6081        TypeError: If `keep_prob`, `cell_clip` or `forget_bias` is not a float.
6082        TypeError: If `use_peehpole`, `time_major` or `is_training` is not a bool.
6083        TypeError: If `x`, `w`, `b`, `seq_length`, `init_h` or `init_c` is not a Tensor.
6084        TypeError: If dtype of `x`, `w`, `init_h` or `init_c` is not float16.
6085        TypeError: If dtype of `b` is neither float16 nor float32.
6086
6087    Supported Platforms:
6088        ``Ascend``
6089
6090    Examples:
6091        >>> import numpy as np
6092        >>> from mindspore import Tensor, ops
6093        >>> x = Tensor(np.random.rand(2, 16, 64).astype(np.float16))
6094        >>> w = Tensor(np.random.rand(96, 128).astype(np.float16))
6095        >>> b = Tensor(np.random.rand(128).astype(np.float16))
6096        >>> init_h = Tensor(np.random.rand(1, 16, 32).astype(np.float16))
6097        >>> init_c = Tensor(np.random.rand(1, 16, 32).astype(np.float16))
6098        >>> dynamic_rnn = ops.DynamicRNN()
6099        >>> output = dynamic_rnn(x, w, b, None, init_h, init_c)
6100        >>> print(output[0].shape)
6101        (2, 16, 32)
6102    """
6103
6104    @prim_attr_register
6105    def __init__(self,
6106                 cell_type='LSTM',
6107                 direction='UNIDIRECTIONAL',
6108                 cell_depth=1,
6109                 use_peephole=False,
6110                 keep_prob=1.0,
6111                 cell_clip=-1.0,
6112                 num_proj=0,
6113                 time_major=True,
6114                 activation='tanh',
6115                 forget_bias=0.0,
6116                 is_training=True):
6117        """Initialize DynamicRNN."""
6118        self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
6119        self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name)
6120        self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
6121        validator.check_number_range(keep_prob, 0.0, 1.0, validator.INC_BOTH, float, "keep_prob")
6122        self.cell_clip = validator.check_value_type("cell_clip", cell_clip, [float], self.name)
6123        self.num_proj = validator.check_non_negative_int(num_proj, "num_proj", self.name)
6124        self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name)
6125        self.use_peephole = validator.check_value_type("use_peephole", use_peephole, [bool], self.name)
6126        self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name)
6127        validator.check("time_major", time_major, "the supported value", True, validator.EQ, self.name)
6128        self.is_training = validator.check_value_type("is_training", is_training, [bool], self.name)
6129        validator.check_value_type("cell_type", cell_type, [str], self.name)
6130        self.cell_type = validator.check_string(cell_type, ['LSTM'], "cell_type", self.name)
6131        validator.check_value_type("direction", direction, [str], self.name)
6132        self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name)
6133        validator.check_value_type("activation", activation, [str], self.name)
6134        self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
6135
6136
6137class DynamicGRUV2(Primitive):
6138    r"""
6139    Applies a single-layer gated recurrent unit (GRU) to an input sequence.
6140
6141    .. math::
6142
6143        \begin{array}{ll}
6144            r_{t+1} = \sigma(W_{ir} x_{t+1} + b_{ir} + W_{hr} h_{(t)} + b_{hr}) \\
6145            z_{t+1} = \sigma(W_{iz} x_{t+1} + b_{iz} + W_{hz} h_{(t)} + b_{hz}) \\
6146            n_{t+1} = \tanh(W_{in} x_{t+1} + b_{in} + r_{t+1} * (W_{hn} h_{(t)}+ b_{hn})) \\
6147            h_{t+1} = (1 - z_{t+1}) * n_{t+1} + z_{t+1} * h_{(t)}
6148        \end{array}
6149
6150    where :math:`h_{t+1}` is the hidden state at time `t+1`, :math:`x_{t+1}` is the input
6151    at time `t+1`, :math:`h_{t}` is the hidden state of the layer
6152    at time `t` or the initial hidden state at time `0`. :math:`r_{t+1}`,
6153    :math:`z_{t+1}`, :math:`n_{t+1}` are the reset, update, and new gates, respectively.
6154    :math:`W`, :math:`b` are the weight parameter and the deviation parameter respectively.
6155    :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product.
6156
6157    Args:
6158        direction (str, optional): A string identifying the direction in the operator. Default: ``'UNIDIRECTIONAL'`` .
6159            Only ``'UNIDIRECTIONAL'`` is currently supported.
6160        cell_depth (int, optional): An integer identifying the cell depth in the operator. Default: ``1`` .
6161        keep_prob (float, optional): A float identifying the keep prob in the operator. Default: ``1.0`` .
6162        cell_clip (float, optional): A float identifying the cell clip in the operator. Default: ``-1.0`` .
6163        num_proj (int, optional): An integer identifying the number projection in the operator. Default: ``0`` .
6164        time_major (bool, optional): A bool identifying the time major in the operator. Default: ``True`` .
6165        activation (str, optional) : A string identifying the type of activation function in the operator.
6166            Default: ``'tanh'`` . Only ``'tanh'`` is currently supported.
6167        gate_order (str, optional): A string identifying the gate order in weight and bias. Default: ``'rzh'`` .
6168            ``'zrh'`` is another option. Here, ``'rzh'`` means the gate order is: reset gate, update gate, hidden gate.
6169            ``'zrh'`` means the gate order is: update gate, reset gate, hidden gate.
6170        reset_after (bool, optional): A bool identifying whether to apply reset gate after matrix multiplication.
6171            Default: ``True`` .
6172        is_training (bool, optional): A bool identifying is training in the operator. Default: ``True`` .
6173
6174    Inputs:
6175        - **x** (Tensor) - Current words.
6176          Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{input_size})`.
6177          The data type must be float16.
6178        - **weight_input** (Tensor) - Input-hidden weight :math:`W_{\{ir,iz,in\}}`.
6179          Tensor of shape :math:`(\text{input_size}, 3 \times \text{hidden_size})`.
6180          The data type must be float16.
6181        - **weight_hidden** (Tensor) - Hidden-hidden weight :math:`W_{\{hr,hz,hn\}}`.
6182          Tensor of shape :math:`(\text{hidden_size}, 3 \times \text{hidden_size})`.
6183          The data type must be float16.
6184        - **bias_input** (Tensor) - Input-hidden bias :math:`b_{\{ir,iz,in\}}`.
6185          Tensor of shape :math:`(3 \times \text{hidden_size})`, or None.
6186          Has the same data type with input `init_h`.
6187        - **bias_hidden** (Tensor) - Hidden-hidden bias :math:`b_{\{hr,hz,hn\}}`.
6188          Tensor of shape :math:`(3 \times \text{hidden_size})`,
6189          or None. Has the same data type with input `init_h`.
6190        - **seq_length** (Tensor) - The length of each batch. Tensor of shape :math:`(\text{batch_size})`.
6191          Only `None` is currently supported.
6192        - **init_h** (Tensor) - Hidden state of initial time.
6193          Tensor of shape :math:`(\text{batch_size}, \text{hidden_size})`.
6194          The data type must be float16 or float32.
6195
6196    Outputs:
6197        - **y** (Tensor) - A Tensor of shape:
6198
6199          - y_shape = :math:`(num\_step, batch\_size, min(hidden\_size, num\_proj))`: `If num_proj > 0`,
6200          - y_shape = :math:`(num\_step, batch\_size, hidden\_size)`: `If num_proj = 0`.
6201
6202          Has the same data type with input `bias_type`.
6203        - **output_h** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`.
6204          Has the same data type with input `bias_type`.
6205        - **update** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`.
6206          Has the same data type with input `bias_type`.
6207        - **reset** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`.
6208          Has the same data type with input `bias_type`.
6209        - **new** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`.
6210          Has the same data type with input `bias_type`.
6211        - **hidden_new** (Tensor) - A Tensor of shape :math:`(\text{num_step}, \text{batch_size}, \text{hidden_size})`.
6212          Has the same data type with input `bias_type`.
6213
6214        A note about the bias_type:
6215
6216        - If `bias_input` and `bias_hidden` both are `None`, `bias_type` is the data type of `init_h`.
6217        - If `bias_input` is not `None`, `bias_type` is the data type of `bias_input`.
6218        - If `bias_input` is `None` and `bias_hidden` is not `None`, `bias_type` is the data type of `bias_hidden`.
6219
6220    Raises:
6221        TypeError: If `direction`, `activation` or `gate_order` is not a str.
6222        TypeError: If `cell_depth` or `num_proj` is not an int.
6223        TypeError: If `keep_prob` or `cell_clip` is not a float.
6224        TypeError: If `time_major`, `reset_after` or `is_training` is not a bool.
6225        TypeError: If `x`, `weight_input`, `weight_hidden`, `bias_input`, `bias_hidden`, `seq_length` or `ini_h` is not
6226                   a Tensor.
6227        TypeError: If dtype of `x`, `weight_input` or `weight_hidden` is not float16.
6228        TypeError: If dtype of `init_h` is neither float16 nor float32.
6229
6230    Supported Platforms:
6231        ``Ascend``
6232
6233    Examples:
6234        >>> import numpy as np
6235        >>> from mindspore import Tensor, ops
6236        >>> x = Tensor(np.random.rand(2, 8, 64).astype(np.float16))
6237        >>> weight_i = Tensor(np.random.rand(64, 48).astype(np.float16))
6238        >>> weight_h = Tensor(np.random.rand(16, 48).astype(np.float16))
6239        >>> bias_i = Tensor(np.random.rand(48).astype(np.float16))
6240        >>> bias_h = Tensor(np.random.rand(48).astype(np.float16))
6241        >>> init_h = Tensor(np.random.rand(8, 16).astype(np.float16))
6242        >>> dynamic_gru_v2 = ops.DynamicGRUV2()
6243        >>> output = dynamic_gru_v2(x, weight_i, weight_h, bias_i, bias_h, None, init_h)
6244        >>> print(output[0].shape)
6245        (2, 8, 16)
6246    """
6247
6248    @prim_attr_register
6249    def __init__(self,
6250                 direction='UNIDIRECTIONAL',
6251                 cell_depth=1,
6252                 keep_prob=1.0,
6253                 cell_clip=-1.0,
6254                 num_proj=0,
6255                 time_major=True,
6256                 activation="tanh",
6257                 gate_order="rzh",
6258                 reset_after=True,
6259                 is_training=True):
6260        """Initialize DynamicGRUV2."""
6261        self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name)
6262        self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name)
6263        self.cell_clip = validator.check_value_type("cell_clip", cell_clip, [float], self.name)
6264        self.num_proj = validator.check_non_negative_int(num_proj, "num_proj", self.name)
6265        self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name)
6266        self.is_training = validator.check_value_type("is_training", is_training, [bool], self.name)
6267        self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name)
6268        self.activation = validator.check_string(activation, ['tanh'], "activation", self.name)
6269        self.gate_order = validator.check_string(gate_order, ['zrh', 'rzh'], "gate_order", self.name)
6270        self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name)
6271        self.init_prim_io_names(
6272            inputs=[
6273                "x", "weight_input", "weight_hidden", "bias_input",
6274                "bias_hidden", "seq_length", "init_h"
6275            ],
6276            outputs=["y", "output_h", "update", "reset", "new", "hidden_new"])
6277
6278
6279class InTopK(Primitive):
6280    r"""
6281    Determines whether the targets are in the top `k` predictions.
6282
6283    Refer to :func:`mindspore.ops.intopk` for more details.
6284
6285    Args:
6286        k (int): Specifies the number of top elements to be used for computing precision along the last dimension.
6287
6288    Inputs:
6289        - **x1** (Tensor) - A 2D Tensor defines the predictions of a batch of samples with float16 or float32
6290          data type.
6291        - **x2** (Tensor) - A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of `x2`
6292          must be equal to the first dimension of `x1`. The values of `x2` can not be negative and
6293          must be equal to or less than index of x1's second dimension.
6294
6295    Outputs:
6296        Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`,
6297        if the label in the first `k` predictions for sample `i` is in `x1`, then the value is ``True`` ,
6298        otherwise ``False`` .
6299
6300    Supported Platforms:
6301        ``Ascend`` ``GPU`` ``CPU``
6302
6303    Examples:
6304        >>> import mindspore
6305        >>> import numpy as np
6306        >>> from mindspore import Tensor, ops
6307        >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
6308        >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
6309        >>> in_top_k = ops.InTopK(3)
6310        >>> output = in_top_k(x1, x2)
6311        >>> print(output)
6312        [ True  False]
6313    """
6314
6315    @prim_attr_register
6316    def __init__(self, k):
6317        """Initialize InTopK"""
6318        self.init_prim_io_names(inputs=['x1', 'x2', 'k'], outputs=['y'])
6319        validator.check_value_type("k", k, [int], self.name)
6320
6321
6322class LRN(Primitive):
6323    r"""
6324    Local Response Normalization.
6325
6326    .. warning::
6327        LRN is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
6328        normalization methods, e.g. :class:`mindspore.ops.BatchNorm`.
6329
6330    .. math::
6331
6332        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
6333        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}
6334
6335    where the :math:`a_{c}` indicates the specific value of the pixel corresponding to :math:`c` in feature map;
6336    where the :math:`n/2` indicates the `depth_radius`; where the :math:`k` indicates the `bias`;
6337    where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`.
6338
6339    Args:
6340        depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: ``5`` .
6341        bias (float): An offset (usually positive to avoid dividing by 0). Default: ``1.0`` .
6342        alpha (float): A scale factor, usually positive. Default: ``1.0`` .
6343        beta (float): An exponent. Default: ``0.5`` .
6344        norm_region (str): Specifies normalization region. Options: ``"ACROSS_CHANNELS"`` .
6345            Default: ``"ACROSS_CHANNELS"`` .
6346
6347    Inputs:
6348        - **x** (Tensor) - A 4-D Tensor with float16 or float32 data type.
6349
6350    Outputs:
6351        Tensor, with the same shape and data type as `x`.
6352
6353    Raises:
6354        TypeError: If `depth_radius` is not an int.
6355        TypeError: If `bias`, `alpha` or `beta` is not a float.
6356        TypeError: If `norm_region` is not a str.
6357        TypeError: If `x` is not a Tensor.
6358
6359    Supported Platforms:
6360        ``GPU`` ``CPU``
6361
6362    Examples:
6363        >>> import mindspore
6364        >>> import numpy as np
6365        >>> from mindspore import Tensor, ops
6366        >>> x = Tensor(np.array([[[[0.1], [0.2]],
6367        ...                       [[0.3], [0.4]]]]), mindspore.float32)
6368        >>> lrn = ops.LRN()
6369        >>> output = lrn(x)
6370        >>> print(output)
6371        [[[[0.09534626]
6372           [0.1825742 ]]
6373          [[0.2860388 ]
6374           [0.3651484 ]]]]
6375    """
6376
6377    @prim_attr_register
6378    def __init__(self, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"):
6379        """Initialize LRN"""
6380        super().__init__("LRN")
6381        self.init_prim_io_names(inputs=['x'], outputs=['y'])
6382        validator.check_value_type("depth_radius", depth_radius, [int], self.name)
6383        validator.check_value_type("bias", bias, [float], self.name)
6384        validator.check_value_type("alpha", alpha, [float], self.name)
6385        validator.check_value_type("beta", beta, [float], self.name)
6386        validator.check_value_type("norm_region", norm_region, [str], self.name)
6387        validator.check_string(norm_region, ['ACROSS_CHANNELS'], 'norm_region', self.name)
6388        validator.check_non_negative_int(depth_radius, "depth_radius", self.name)
6389
6390
6391class AvgPool3D(Primitive):
6392    r"""
6393    3D Average pooling operation.
6394
6395    Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, AvgPool3D outputs
6396    regional average in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size
6397    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows.
6398
6399    .. warning::
6400        "kernel_size" is in the range [1, 255]. "strides" is in the range [1, 63].
6401
6402    .. math::
6403        \text{output}(N_i, C_j, d, h, w) =
6404        \frac{1}{d_{ker} * h_{ker} * w_{ker}} \sum_{l=0}^{d_{ker}-1} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1}
6405        \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
6406
6407    Args:
6408        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value,
6409            is an int number that represents depth, height and width are both kernel_size, or a tuple
6410            of three int numbers that represent depth, height and width respectively. Default: ``1`` .
6411        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
6412            the depth, height and width of movement are both strides, or a tuple of three int numbers that
6413            represent depth, height and width of movement respectively. Default: ``1`` .
6414        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
6415            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
6416
6417            - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
6418              are the same when `stride` is set to ``1``.
6419              The amount of padding to is calculated by the operator internally.  If the amount is even,
6420              it isuniformly distributed around the input, if it is odd, the excess amount goes
6421              to the front/right/bottom side.
6422              If this mode is set, `pad` must be 0.
6423            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6424              possible depth, height and width. Extra pixels that could not complete a full stride will
6425              be discarded. If this mode is set, `pad` must be 0.
6426            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
6427              in the depth, height and width dimension is determined by the `pad` parameter.
6428              If this mode is set, `pad` must be greater than or equal to 0.
6429
6430        pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
6431            the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
6432            If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
6433            pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
6434        ceil_mode (bool): If ``True`` , ceil instead of floor to compute the output shape. Default: ``False`` .
6435        count_include_pad (bool): If ``True`` , averaging calculation will include the zero-padding.
6436            Default: ``True`` .
6437        divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
6438            otherwise kernel_size will be used. Default: ``0`` .
6439        data_format (str) : The optional value for data format. Currently only support ``'NCDHW'`` .
6440            Default: ``'NCDHW'`` .
6441
6442    Inputs:
6443        - **x** (Tensor) - Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
6444          Currently support float16, float32 and float64 data type.
6445
6446    Outputs:
6447        Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `x`.
6448
6449    Raises:
6450        TypeError: If `kernel_size`, `strides` or `pad` is neither an int not a tuple.
6451        TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
6452        TypeError: If `pad_mode` or `data_format` is not a string.
6453        TypeError: If `divisor_override` is not an int.
6454        ValueError: If numbers in `kernel_size` or `strides` are not positive.
6455        ValueError: If `kernel_size` or `strides` is a tuple whose length is not equal to 3.
6456        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
6457        ValueError: If `pad` is a tuple whose length is not equal to 6.
6458        ValueError: If element of `pad` is less than 0.
6459        ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to 0 or (0, 0, 0, 0, 0, 0).
6460        ValueError: If `data_format` is not 'NCDHW'.
6461
6462    Supported Platforms:
6463        ``Ascend`` ``GPU`` ``CPU``
6464
6465    Examples:
6466        >>> import mindspore
6467        >>> from mindspore import Tensor, ops
6468        >>> import numpy as np
6469        >>> x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
6470        >>> avg_pool3d = ops.AvgPool3D(kernel_size=2, strides=1, pad_mode="valid")
6471        >>> output = avg_pool3d(x)
6472        >>> print(output)
6473        [[[[[ 5.  6.]]]
6474          [[[17. 18.]]]]]
6475    """
6476
6477    @prim_attr_register
6478    def __init__(self, kernel_size=1, strides=1, pad_mode="valid", pad=0, ceil_mode=False,
6479                 count_include_pad=True, divisor_override=0, data_format="NCDHW"):
6480        """Initialize AvgPool3D"""
6481        self.init_prim_io_names(inputs=['input'], outputs=['output'])
6482        self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name, ret_five=True)
6483        self.add_prim_attr('kernel_size', self.kernel_size)
6484        self.strides = _check_3d_int_or_tuple('strides', strides, self.name, ret_five=True)
6485        self.add_prim_attr('strides', self.strides)
6486        validator.check_value_type('pad', pad, (int, tuple, list), self.name)
6487        if isinstance(pad, int):
6488            pad = (pad,) * 6
6489        if len(pad) != 6:
6490            raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of "
6491                             f"six positive int numbers, but got {self.pad}.")
6492        self.pad_list = pad
6493        self.add_prim_attr('pad_list', self.pad_list)
6494        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
6495        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME', 'PAD'], 'pad_mode', self.name)
6496        self.add_prim_attr('pad_mode', self.pad_mode)
6497
6498        if self.pad_mode != 'PAD' and pad != (0, 0, 0, 0, 0, 0):
6499            raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' "
6500                             f"is not \"PAD\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.")
6501        if self.pad_mode == 'PAD':
6502            for item in pad:
6503                validator.check_non_negative_int(item, 'pad or item of pad', self.name)
6504        self.ceil_mode = validator.check_value_type('ceil_mode', ceil_mode, bool, self.name)
6505        self.count_include_pad = validator.check_value_type('count_include_pad', count_include_pad, bool, self.name)
6506        self.divisor_override = validator.check_non_negative_int(divisor_override, 'divisor_override', self.name)
6507        self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name)
6508
6509
6510class Conv3D(Primitive):
6511    r"""
6512    3D convolution layer.
6513
6514    Applies a 3D convolution over an input tensor which is typically of shape
6515    :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`,
6516    where :math:`N` is batch size, :math:`C` is channel number,
6517    :math:`D, H, W`
6518    are the depth, height and width of the feature map, respectively.
6519
6520    The output is calculated based on formula:
6521
6522    .. math::
6523
6524        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
6525        \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
6526
6527    where :math:`bias` is the output channel bias, :math:`ccor` is
6528    the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
6529    :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
6530
6531    Here are the indices' meanings:
6532
6533    - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
6534      where :math:`N` is the batch size of the input.
6535
6536    - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
6537      where :math:`C_{out}` is the number of
6538      output channels, which is also equal to the number of kernels.
6539
6540    - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
6541      where :math:`C_{in}` is the number of
6542      input channels, which is also equal to the number of channels in the convolutional kernels.
6543
6544    Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
6545    output channel, :math:`{weight}(C_{\text{out}_j}, k)`represents the slice of the :math:`j`-th convolutional
6546    kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
6547    channel in the :math:`i`-th batch of the input feature map.
6548
6549    The shape of the convolutional kernel is given by
6550    :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
6551    where :math:`\text{kernel_size[0]}` ,
6552    :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth,
6553    height and width of the kernel, respectively.
6554    If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
6555    will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
6556    \text{kernel_size[1]}, \text{kernel_size[2]})`,
6557    where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
6558
6559    For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6560    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
6561
6562    Note:
6563        1. On Ascend platform, :math:`groups=1` must be satisfied.
6564        2. On Ascend :math:`dilation` on depth only supports the case of 1.
6565
6566    Args:
6567        out_channel (int): Specifies output channel :math:`C_{out}`.
6568        kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel.
6569            It can be a single int or a tuple of 3 integers. A single int means the value is for depth, height
6570            and the width. A tuple of 3 ints means the first value is for depth and
6571            the rest is for the height and width.
6572        mode (int, optional): Modes for different convolutions. It is currently not used. Default: ``1`` .
6573        stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number
6574            that represents the depth, height and width of movement or a tuple of three int numbers that
6575            represent depth, height and width movement respectively. Default: ``1`` .
6576        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
6577            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
6578
6579            - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
6580              are the same when `stride` is set to ``1``.
6581              The amount of padding to is calculated by the operator internally.  If the amount is even,
6582              it isuniformly distributed around the input, if it is odd, the excess amount goes
6583              to the front/right/bottom side.
6584              If this mode is set, `pad` must be 0.
6585            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6586              possible depth, height and width. Extra pixels that could not complete a full stride will
6587              be discarded. If this mode is set, `pad` must be 0.
6588            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
6589              in the depth, height and width dimension is determined by the `pad` parameter.
6590              If this mode is set, `pad` must be greater than or equal to 0.
6591
6592        pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
6593            when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 6 ints.
6594            If `pad` is one integer, the paddings of head, tail, top, bottom,
6595            left and right are the same, equal to `pad`. If `pad` is a tuple with 6 integers, the
6596            paddings of head, tail, top, bottom, left and right is equal to pad[0],
6597            pad[1], pad[2], pad[3], pad[4] and pad[5] accordingly. Default: ``0`` .
6598        dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to use for dilated convolution.
6599            It can be a single int or a tuple of 3 integers. A single int means the dilation size is the same
6600            in the depth, height and width directions. A tuple of 3 ints represents the dilation size in
6601            the depth, height and width directions, respectively.
6602            Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a
6603            spacing of :math:`d0-1` elements in the depth direction,
6604            :math:`d1-1` elements in the height direction, :math:`d2-1` elements in the
6605            width direction respectively. The values in the depth, height and width dimensions are in the
6606            ranges [1, D], [1, H] and [1, W], respectively.
6607            Default: ``1`` .
6608        group (int, optional): The number of groups into which the filter is divided. `in_channels`
6609            and `out_channels` must be divisible by `group`. Default: ``1`` .
6610        data_format (str, optional): The optional value for data format. Currently only support ``"NCDHW"`` .
6611
6612    Inputs:
6613        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
6614          Currently input data type only support float16 and float32.
6615        - **weight** (Tensor) - Set size of kernel is :math:`(k_d, K_h, K_w)`, then the shape is
6616          :math:`(C_{out}, C_{in}/groups, k_d, K_h, K_w)`.
6617          Currently weight data type only support float16 and float32.
6618        - **bias** (Tensor) - Tensor of shape :math:`(C_{out})`. When bias is None, zeros will be used.
6619          Default: ``None`` .
6620
6621    Outputs:
6622        Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
6623
6624        `pad_mode` is ``"same"``:
6625
6626        .. math::
6627            \begin{array}{ll} \\
6628                D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
6629                H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
6630                W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
6631            \end{array}
6632
6633        `pad_mode` is ``"valid"``:
6634
6635        .. math::
6636            \begin{array}{ll} \\
6637                D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
6638                {\text{stride[0]}} + 1} \right \rfloor \\
6639                H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
6640                {\text{stride[1]}} + 1} \right \rfloor \\
6641                W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
6642                {\text{stride[2]}} + 1} \right \rfloor \\
6643            \end{array}
6644
6645        `pad_mode` is ``"pad"``:
6646
6647        .. math::
6648            \begin{array}{ll} \\
6649                D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times
6650                \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
6651                H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times
6652                \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
6653                W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times
6654                \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
6655            \end{array}
6656
6657    Raises:
6658        TypeError: If `out_channel` or `group` is not an int.
6659        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
6660        ValueError: If `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1.
6661        ValueError: If `pad` is less than 0.
6662        ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
6663        ValueError: If `pad` is a tuple whose length is not equal to 6.
6664        ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0).
6665        ValueError: If `data_format` is not 'NCDHW'.
6666
6667    Supported Platforms:
6668        ``Ascend`` ``GPU`` ``CPU``
6669
6670    Examples:
6671        >>> import mindspore
6672        >>> import numpy as np
6673        >>> from mindspore import Tensor, ops
6674        >>> # case 1: specify kernel_size with tuple, all parameters use default values.
6675        >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
6676        >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
6677        >>> conv3d = ops.Conv3D(out_channel=32, kernel_size=(4, 3, 3))
6678        >>> output = conv3d(x, weight)
6679        >>> print(output.shape)
6680        (16, 32, 7, 30, 30)
6681        >>> # case 2: specify kernel_size with int, all parameters use default values.
6682        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6683        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6684        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3)
6685        >>> output = conv3d(x, weight)
6686        >>> print(output.shape)
6687        (10, 40, 30, 30, 30)
6688         >>> # case 3: stride=(1, 2, 3), other parameters being default.
6689        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6690        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6691        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3))
6692        >>> output = conv3d(x, weight)
6693        >>> print(output.shape)
6694        (10, 40, 30, 15, 10)
6695         >>> # case 4: pad_mode="pad", other parameters being default.
6696        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6697        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6698        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, pad_mode="pad", pad=2)
6699        >>> output = conv3d(x, weight)
6700        >>> print(output.shape)
6701        (10, 40, 34, 34, 34)
6702         >>> # case 5: dilation=(1, 1, 1), other parameters being default.
6703        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6704        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6705        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, dilation=(1, 1, 1))
6706        >>> output = conv3d(x, weight)
6707        >>> print(output.shape)
6708        (10, 40, 30, 30, 30)
6709        >>> # case 6: group=1, other parameters being default.
6710        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6711        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6712        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, group=1)
6713        >>> output = conv3d(x, weight)
6714        >>> print(output.shape)
6715        (10, 40, 30, 30, 30)
6716        >>> # case 7: All parameters are specified.
6717        >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
6718        >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
6719        >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3), pad_mode="pad",
6720        ...                     pad=2, dilation=(1), group=1)
6721        >>> output = conv3d(x, weight)
6722        >>> print(output.shape)
6723        (10, 40, 34, 17, 12)
6724    """
6725
6726    @prim_attr_register
6727    def __init__(self,
6728                 out_channel,
6729                 kernel_size,
6730                 mode=1,
6731                 pad_mode="valid",
6732                 pad=0,
6733                 stride=1,
6734                 dilation=1,
6735                 group=1,
6736                 data_format="NCDHW"):
6737        """Initialize Conv3D"""
6738        self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
6739        self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name)
6740        if isinstance(kernel_size, int):
6741            self.kernel_size = (kernel_size,) * 3
6742        self.add_prim_attr('kernel_size', self.kernel_size)
6743        self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, ret_five=True)
6744        self.add_prim_attr('strides', self.stride)
6745        target = context.get_context("device_target")
6746        if target.lower() == "ascend":
6747            self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
6748                                                   ret_five=True, third_one=True)
6749        else:
6750            self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
6751                                                   ret_five=True, third_one=False)
6752        self.add_prim_attr('dilations', self.dilation)
6753        validator.check_value_type('pad', pad, (int, tuple), self.name)
6754        if isinstance(pad, int):
6755            pad = (pad,) * 6
6756        if len(pad) != 6:
6757            raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of "
6758                             f"six positive int numbers, but got {self.pad}.")
6759        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
6760        self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name)
6761        self.add_prim_attr('pad_mode', self.pad_mode)
6762
6763        if self.pad_mode != 'pad' and pad != (0, 0, 0, 0, 0, 0):
6764            raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' "
6765                             f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.")
6766        self.add_prim_attr("pad", pad)
6767        self.padding = pad
6768        if self.pad_mode == 'pad':
6769            for item in pad:
6770                validator.check_non_negative_int(item, 'pad item', self.name)
6771
6772        self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
6773        self.add_prim_attr('mode', self.mode)
6774        self.format = validator.check_string(data_format, ['NCDHW'], 'data_format', self.name)
6775        self.add_prim_attr('data_format', self.format)
6776        self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
6777        validator.check_value_type("group", group, (int,), self.name)
6778        validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
6779        device_target = context.get_context("device_target")
6780        if self.out_channel % group != 0:
6781            raise ValueError("The argument 'group' should be divisible by 'out_channel'")
6782        if device_target == "Ascend" and group != 1:
6783            raise ValueError("On Ascend platform, group = 1 must be satisfied.")
6784
6785        self.group = group
6786        self.add_prim_attr('groups', self.group)
6787        self.add_prim_attr('offset_x', 0)
6788
6789
6790class Conv3DBackpropInput(Primitive):
6791    """
6792    Computes the gradients of convolution 3D with respect to the input.
6793
6794    Args:
6795        out_channel (int): The dimension of the output.
6796        kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution.
6797        mode (int): Modes for different convolutions. Not currently used.
6798        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
6799            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
6800
6801            - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
6802              are the same when `stride` is set to ``1``.
6803              The amount of padding to is calculated by the operator internally.  If the amount is even,
6804              it isuniformly distributed around the input, if it is odd, the excess amount goes
6805              to the front/right/bottom side.
6806              If this mode is set, `pad` must be 0.
6807            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6808              possible depth, height and width. Extra pixels that could not complete a full stride will
6809              be discarded. If this mode is set, `pad` must be 0.
6810            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
6811              in the depth, height and width dimension is determined by the `pad` parameter.
6812              If this mode is set, `pad` must be greater than or equal to 0.
6813
6814        pad (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
6815                    paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a
6816                    tuple of four integers, the padding of head, tail, top, bottom, left and right equal to pad[0],
6817                    pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
6818        stride (Union(int, tuple[int])): The stride to be applied to the convolution filter. Default: ``1`` .
6819        dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: ``1`` .
6820        group (int): Splits input into groups. Default: ``1`` .
6821        data_format (str): The optional value for data format. Currently only support ``'NCDHW'`` .
6822
6823    Inputs:
6824        - **weight** (Tensor) - Set size of kernel is :math:`(D_{in}, K_h, K_w)`, then the shape is
6825          :math:`(C_{out}, C_{in}, D_{in}, K_h, K_w)`. Currently weight data type only support float16 and float32.
6826        - **dout** (Tensor) - the gradients with respect to the output of the convolution.
6827          The shape conforms to the default.
6828          data_format :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. Currently dout data type only support float16
6829          and float32.
6830        - **input_size** (tuple(int)) - A tuple describes the shape of the input which conforms to the format
6831          :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
6832
6833    Outputs:
6834        Tensor, the gradients with respect to the input of convolution 3D. It has the same shape as the input.
6835
6836    Raises:
6837        TypeError: If `out_channel` or `group` is not an int.
6838        TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int not a tuple.
6839        ValueError: If `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1.
6840        ValueError: If `pad` is less than 0.
6841        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
6842        ValueError: If `pad` is a tuple whose length is not equal to 6.
6843        ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0).
6844        ValueError: If `data_format` is not 'NCDHW'.
6845
6846    Supported Platforms:
6847        ``Ascend``
6848
6849    Examples:
6850        >>> import numpy as np
6851        >>> import mindspore
6852        >>> from mindspore import Tensor, ops
6853        >>> dout = Tensor(np.ones([16, 32, 10, 32, 32]), mindspore.float16)
6854        >>> weight = Tensor(np.ones([32, 32, 4, 6, 2]), mindspore.float16)
6855        >>> x = Tensor(np.ones([16, 32, 13, 37, 33]))
6856        >>> conv3d_backprop_input = ops.Conv3DBackpropInput(out_channel=4, kernel_size=(4, 6, 2))
6857        >>> output = conv3d_backprop_input(dout, weight, ops.shape(x))
6858        >>> print(output.shape)
6859        (16, 32, 13, 37, 33)
6860    """
6861
6862    @prim_attr_register
6863    def __init__(self,
6864                 out_channel,
6865                 kernel_size,
6866                 mode=1,
6867                 pad_mode="valid",
6868                 pad=0,
6869                 stride=1,
6870                 dilation=1,
6871                 group=1,
6872                 data_format="NCDHW"):
6873        """Initialize Conv3DBackpropInput"""
6874        self.init_prim_io_names(inputs=['filter', 'out_backprop', 'input_size'], outputs=['y'])
6875        self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
6876        self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name)
6877        self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=True, ret_five=True)
6878        self.add_prim_attr('strides', self.stride)
6879        self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=True, ret_five=True)
6880        self.add_prim_attr('dilations', self.dilation)
6881        validator.check_value_type('pad', pad, (int, tuple), self.name)
6882        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
6883        if isinstance(pad, int):
6884            pad = (pad,) * 6
6885        validator.check_equal_int(len(pad), 6, 'pad size', self.name)
6886        self.add_prim_attr("pad", pad)
6887        self.pad_list = pad
6888
6889        self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name)
6890        if self.pad_mode != 'pad' and self.pad_list != (0, 0, 0, 0, 0, 0):
6891            raise ValueError(f"For '{self.name}', the 'pad' must be (0, 0, 0, 0, 0, 0) "
6892                             f"when 'pad_mode' is not \"pad\", "
6893                             f"but got 'pad' is {self.pad_list} and 'pad_mode' is {self.pad_mode}.")
6894        if self.pad_mode == 'pad':
6895            for item in pad:
6896                validator.check_non_negative_int(item, 'pad item', self.name)
6897        self.add_prim_attr('pad_mode', self.pad_mode)
6898
6899        self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
6900        self.add_prim_attr('mode', self.mode)
6901        self.group = validator.check_positive_int(group, 'group', self.name)
6902        self.add_prim_attr('groups', self.group)
6903        self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name)
6904        self.add_prim_attr('data_format', self.format)
6905
6906
6907def _deconv_output_length(input_length, kernel_size, stride_size, dilation_size):
6908    filter_size = kernel_size + (kernel_size - 1) * (dilation_size - 1)
6909    if filter_size - stride_size > 0:
6910        length = input_length * stride_size + filter_size - stride_size
6911    else:
6912        length = input_length * stride_size
6913    return length
6914
6915
6916class SparseApplyAdadelta(Primitive):
6917    r"""
6918    Updates relevant entries according to the adadelta scheme.
6919
6920    .. math::
6921            \begin{array}{ll} \\
6922                accum = \rho * accum + (1 - \rho) * grad^2 \\
6923                \text{update} = \sqrt{\text{accum_update} + \epsilon} * \frac{grad}{\sqrt{accum + \epsilon}} \\
6924                var = var -  update * lr \\
6925                \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * update^2 \\
6926            \end{array}
6927
6928    Inputs of 'var', 'accum', 'accum_update' and 'grad' comply with the implicit type conversion rules
6929    to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion.
6930    If they have different data types, the lower priority data type will be converted to
6931    relatively highest priority data type.
6932    RuntimeError exception will be thrown when the data type conversion of Parameter is required.
6933
6934    Note:
6935        If there are negative values or values greater than or equal to var.shape[0] in `indices`,
6936        the behavior is undefined. Besides, this operator doesn't support duplicates in `indices`.
6937
6938    Args:
6939        epsilon (float): A small value added for numerical stability. Its value must be greater or equal to 0.
6940        use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
6941            Default: ``False`` .
6942
6943    Inputs:
6944        - **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
6945        - **accum** (Parameter) - Accumulation to be updated. Mush have the same shape and dtype as `var`.
6946          With float32 or float16 data type.
6947        - **accum_update** (Parameter) - Accum_update to be updated. Must have the same shape and dtype as `var`.
6948          With float32 or float16 data type.
6949        - **lr** (Union[float, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
6950        - **rho** (Union[float, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
6951        - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
6952        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
6953          Must be one of the following types: int32, int64 and indices.shape[0] = grad.shape[0].
6954
6955    Outputs:
6956        Tuple of 3 Tensor, the updated parameters.
6957
6958        - **var** (Tensor) - The same shape and data type as `var`.
6959        - **accum** (Tensor) - The same shape and data type as `accum`.
6960        - **accum_update** (Tensor) - The same shape and data type as `accum_update`.
6961
6962    Raises:
6963        TypeError: If `epsilon` is not a float.
6964        TypeError: If `use_locking` is not a bool.
6965        TypeError: If `var`, 'accum', 'accum_update' is not a Parameter.
6966        TypeError: If dtype of `accum`, `accum_updata`, `grad` is not same as `var`.
6967        TypeError: If dtype of `var`, `accum`, `accum_update`, `lr`, `rho` or `grad` is neither float16 nor
6968                   float32.
6969        TypeError: If dtype of `indices` is neither int32 nor int64.
6970        ValueError: If `epsilon` is less than 0.
6971        ValueError: If the shape of `accum`, `accum_updata`, `grad` is not same as `var`.
6972        ValueError: If the rank of `indices` is not equal to 1.
6973        ValueError: If shape of `indices` is not same as shape of first dimension of `grad`.
6974
6975    Supported Platforms:
6976        ``Ascend``
6977
6978    Examples:
6979        >>> class Net(nn.Cell):
6980        ...     def __init__(self,epsilon,use_locking = False):
6981        ...         super(Net, self).__init__()
6982        ...         self.sparse_apply_adadelta = P.SparseApplyAdadelta(epsilon,use_locking)
6983        ...         self.var = Parameter(Tensor(np.array([[1.0,2.0],[2.0,3.0]]).astype(np.float32)), name="var")
6984        ...         self.accum = Parameter(Tensor(np.array([[1.5,2.5],[3.5,4.5]]).astype(np.float32)), name="accum")
6985        ...         self.accum_update = Parameter(Tensor(np.array([[1.2,2.4],[1.8,0.6]]).astype(np.float32)),
6986        ...                name="accum_update")
6987        ...     def construct(self, lr, rho, grad, indices):
6988        ...         out = self.sparse_apply_adadelta(self.var, self.accum, self.accum_update, lr, rho, grad, indices)
6989        ...         return out
6990        ...
6991        >>> epsilon = 1e-6
6992        >>> net = Net(epsilon)
6993        >>> lr = 0.01
6994        >>> rho = 0.2
6995        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
6996        >>> output = net(lr, rho, grad, Tensor(np.array([0,1],dtype=np.int32)))
6997        >>> print(output)
6998        (Tensor(shape=[2, 2], dtype=Float32, value=
6999        [[ 9.94611859e-01,  1.98851788e+00],
7000         [ 1.99840558e+00,  2.99478507e+00]]), Tensor(shape=[2, 2], dtype=Float32, value=
7001        [[ 3.72000009e-01,  8.91999960e-01],
7002         [ 7.08000004e-01,  1.41200006e+00]]), Tensor(shape=[2, 2], dtype=Float32, value=
7003        [[ 4.72257614e-01,  1.53470778e+00],
7004         [ 3.80338937e-01,  3.37563992e-01]]))
7005    """
7006
7007    __mindspore_signature__ = (
7008        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7009        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7010        sig.make_sig('accum_updata', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7011        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
7012        sig.make_sig('rho', dtype=sig.sig_dtype.T1),
7013        sig.make_sig('grad', dtype=sig.sig_dtype.T),
7014        sig.make_sig('indices', dtype=sig.sig_dtype.T2),
7015    )
7016
7017    @prim_attr_register
7018    def __init__(self, epsilon, use_locking=False):
7019        """Initialize SparseApplyAdadelta"""
7020        validator.check_value_type("epsilon", epsilon, [float], self.name)
7021        validator.check_number("epsilon", epsilon, 0.0, validator.GE, self.name)
7022        validator.check_value_type("use_locking", use_locking, [bool], self.name)
7023
7024
7025class CTCLossV2(Primitive):
7026    """
7027    Calculates the CTC (Connectionist Temporal Classification) loss and the gradient.
7028
7029    The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with
7030    Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_.
7031
7032    .. warning::
7033        This is an experimental API that is subject to change or deletion.
7034
7035    Args:
7036        blank (int, optional): The blank label. Default: ``0`` .
7037        reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'``.
7038            Default: ``'none'`` .
7039
7040        zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss
7041            and its correlated gradient to zero. Default: ``False`` .
7042
7043    Inputs:
7044        - **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
7045          batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64.
7046        - **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
7047          means the target sequences. Supported dtypes: int32, int64.
7048        - **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
7049          It means the lengths of the input. Supported dtypes: int32, int64.
7050        - **target_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
7051          It means the lengths of the target. Supported dtypes: int32, int64.
7052
7053    Outputs:
7054        - **neg_log_likelihood** (Tensor) - A loss value which is differentiable with respect to each input node.
7055        - **log_alpha** (Tensor) - The probability of possible trace of input to target.
7056
7057    Raises:
7058        TypeError: If `zero_infinity` is not a bool.
7059        TypeError: If `reduction` is not string.
7060        TypeError: If the dtype of `log_probs` is not float or double.
7061        TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64.
7062        ValueError: If the rank of `log_probs` is not 3.
7063        ValueError: If the rank of `targets` is not 2.
7064        ValueError: If the shape of `input_lengths` does not match batch_size :math:`N`.
7065        ValueError: If the shape of `target_lengths` does not match batch_size :math:`N`.
7066        TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
7067        ValueError: If the value of `blank` is not in range [0, C).
7068        RuntimeError: If any value of `input_lengths` is larger than (num_labels|C).
7069        RuntimeError: If any `target_lengths[i]` is not in range [0, `input_length[i]`].
7070
7071    Supported Platforms:
7072        ``Ascend`` ``GPU`` ``CPU``
7073
7074    Examples:
7075        >>> import numpy as np
7076        >>> from mindspore import Tensor, ops
7077        >>> from mindspore import dtype as mstype
7078        >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]],
7079        ...                              [[0.9, 0.4, 0.2]]]).astype(np.float32))
7080        >>> targets = Tensor(np.array([[0, 1]]), mstype.int32)
7081        >>> input_lengths = Tensor(np.array([2]), mstype.int32)
7082        >>> target_lengths = Tensor(np.array([1]), mstype.int32)
7083        >>> CTCLossV2 = ops.CTCLossV2(blank=0, reduction='none', zero_infinity=False)
7084        >>> neg_log_hood, log_alpha = CTCLossV2(
7085        ...     log_probs, targets, input_lengths, target_lengths)
7086        >>> print(neg_log_hood)
7087        [-2.2986124]
7088        >>> print(log_alpha)
7089        [[[0.3       0.3            -inf      -inf      -inf]
7090          [1.2       1.8931472 1.2            -inf      -inf]]]
7091    """
7092
7093    @prim_attr_register
7094    def __init__(self, blank=0, reduction="none", zero_infinity=False):
7095        """Initialize CTCLossV2"""
7096        self.init_prim_io_names(inputs=["log_probs", "targets", "input_lengths", "target_lengths"],
7097                                outputs=["neg_log_likelihood", "log_alpha"])
7098        validator.check_value_type("blank", blank, [int], self.name)
7099        self.add_prim_attr("blank", blank)
7100        validator.check_value_type("reduction", reduction, [str], self.name)
7101        self.reduction = reduction.lower()
7102        validator.check_string(self.reduction, ['none'], 'reduction', self.name)
7103        self.add_prim_attr("reduction", self.reduction)
7104        validator.check_value_type("zero_infinity", zero_infinity, [bool], self.name)
7105        self.add_prim_attr("zero_infinity", zero_infinity)
7106
7107
7108class CTCLossV2Grad(Primitive):
7109    """
7110    Calculates the gradient of CTC (Connectionist Temporal Classification) loss.
7111
7112    The CTC algorithm is proposed in `Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with
7113    Recurrent Neural Networks <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_.
7114
7115    Args:
7116        blank (int): The blank label. Default: ``0`` .
7117        reduction (string): Apply specific reduction method to the output. Currently only support 'none'.
7118            Default: ``"none"`` .
7119        zero_infinity (bool): Whether to set infinite loss and correlation gradient to zero. Default: ``False`` .
7120
7121    Inputs:
7122        - **grad_out** (Tenosr) - Gradient renewal codfficient, A tensor for shape (N), where N is batch size.
7123        - **log_probs** (Tensor) - A tensor of shape (T, N, C), where T is input length, N is batch size and C is number
7124          of classes (including blank).
7125        - **targets** (Tensor) - A tensor of shape (N, S), where S is max target length, means the target sequences.
7126        - **input_lengths** (Union(tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the input.
7127        - **target_lengths** (Union(tuple, Tensor)) - A tuple or Tensor of shape(N). It means the lengths of the target.
7128        - **log_alpha** (Tensor) - The probability of possible trace of input to target.
7129        - **neg_log_likelihood** (Tensor) - A loss value which is differentiable with respect to each input node.
7130
7131    Outputs:
7132        - **grad** (Tensor) - The grad of Connectionist Temporal Classification Loss.
7133
7134    Raises:
7135        TypeError: If `zero_infinity` is not a bool, reduction is not string.
7136        TypeError: If the dtype of `log_probs` or `grad_out` is not float or double.
7137        TypeError: If the dtype of `targets`, `input_lengths` or `target_lengths` is not int32 or int64.
7138        RuntimeError: If the rank of `log_probs` is not 3.
7139        RuntimeError: If the rank of `targets` is not 2.
7140        RuntimeError: If the shape of `input_lengths` does not match {batch_size|N}.
7141        RuntimeError: If the shape of `target_lengths` does not match {batch_size|N}.
7142        RuntimeError: If the types of `targets`, `input_lengths`, `grad_out` or `target_lengths` are different.
7143        RuntimeError: If the value of `blank` is not in range [0, num_labels|C).
7144        RuntimeError: If any value of `input_lengths` is larger than (num_labels|C).
7145        RuntimeError: If any target_lengths[i] is not in range [0, input_length[i]].
7146
7147    Supported Platforms:
7148        ``Ascend`` ``CPU``
7149    """
7150
7151    @prim_attr_register
7152    def __init__(self, blank, reduction="none", zero_infinity=False):
7153        """Initialize CTCLossV2Grad"""
7154        self.init_prim_io_names(inputs=["grad_out", "log_probs", "targets", "input_lengths", "target_lengths",
7155                                        "neg_log_likelihood", "log_alpha"],
7156                                outputs=["grad"])
7157        validator.check_value_type("blank", blank, [int], self.name)
7158        self.add_prim_attr("blank", blank)
7159        validator.check_value_type("reduction", reduction, [str], self.name)
7160        self.add_prim_attr("reduction", reduction)
7161        validator.check_value_type("zero_infinity", zero_infinity, [bool], self.name)
7162        self.add_prim_attr("zero_infinity", zero_infinity)
7163
7164
7165class Conv3DTranspose(Primitive):
7166    r"""
7167    Computes a 3D transposed convolution, which is also known as a deconvolution
7168    (although it is not an actual deconvolution).
7169
7170    Input is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size, :math:`C` is channel number,
7171    :math:`D` is depth, :math:`H` is height, :math:`W` is width.
7172
7173    If the 'pad_mode' is set to be "pad", the depth, height and width of output are defined as:
7174
7175    .. math::
7176        D_{out} = (D_{in} - 1) \times \text{stride}[0] - 2 \times \text{pad}[0] + \text{dilation}[0]
7177        \times (\text{kernel_size}[0] - 1) + \text{output_padding}[0] + 1
7178
7179        H_{out} = (H_{in} - 1) \times \text{stride}[1] - 2 \times \text{pad}[1] + \text{dilation}[1]
7180        \times (\text{kernel_size}[1] - 1) + \text{output_padding}[1] + 1
7181
7182        W_{out} = (W_{in} - 1) \times \text{stride}[2] - 2 \times \text{pad}[2] + \text{dilation}[2]
7183        \times (\text{kernel_size}[2] - 1) + \text{output_padding}[2] + 1
7184
7185    Note:
7186        In Ascend, only support :math:`group=1`.
7187
7188    Args:
7189        in_channel (int): The channel of the input x.
7190        out_channel (int): The channel of the weight x.
7191        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers.
7192            Specifies the depth, height and width of the 3D convolution window.
7193            Single int means the value is for the depth, height and width of the kernel.
7194            A tuple of 3 ints means the first value is for the depth, the second value is for the height and the
7195            other is for the width of the kernel.
7196        mode (int, optional): Modes for different convolutions. Default is ``1`` . It is currently not used.
7197        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
7198            ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
7199
7200            - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
7201              are the same when `stride` is set to ``1``.
7202              The amount of padding to is calculated by the operator internally.  If the amount is even,
7203              it isuniformly distributed around the input, if it is odd, the excess amount goes
7204              to the front/right/bottom side.
7205              If this mode is set, `pad` must be 0.
7206            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
7207              possible depth, height and width. Extra pixels that could not complete a full stride will
7208              be discarded. If this mode is set, `pad` must be 0.
7209            - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
7210              in the depth, height and width dimension is determined by the `pad` parameter.
7211              If this mode is set, `pad` must be greater than or equal to 0.
7212
7213        pad (Union(int, tuple[int]), optional): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
7214            the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
7215            If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal
7216            to pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
7217        stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
7218            the depth, height and width of movement are both strides, or a tuple of three int numbers that
7219            represent depth, height and width of movement respectively. Default: ``1`` .
7220        dilation (Union(int, tuple[int]), optional): Specifies the space to use between kernel elements.
7221            Default: ``1`` .
7222        group (int, optional): The number of groups into which the filter is divided. `in_channels`
7223            and `out_channels` must be divisible by `group`. Default: ``1`` .
7224        output_padding (Union(int, tuple[int]), optional): Add extra size to each dimension of the output.
7225            Default: ``0`` .
7226        data_format (str, optional): The optional value for data format. Currently only ``'NCDHW'`` is supported.
7227            Default: ``'NCDHW'``.
7228
7229    Inputs:
7230        - **dout** (Tensor) - The gradients with respect to the output of the convolution.
7231          The shape conforms to the default.
7232          data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`. Currently dout data type only supports float16
7233          and float32.
7234        - **weight** (Tensor) - Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
7235          :math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
7236          :math:`//` is the symbol for integer division.
7237          Currently weight data type only supports float16 and float32.
7238        - **bias** (Tensor) - Tensor of shape :math:`C_{out}`. Currently, only support none. Default: ``None`` .
7239
7240    Outputs:
7241        Tensor, the gradients with respect to the input of convolution 3D.
7242        Tensor of shape :math:`(N, C_{out}//group, D_{out}, H_{out}, W_{out})`,
7243        where :math:`group` is the Args parameter.
7244
7245    Raises:
7246        TypeError: If `in_channel`, `out_channel` or `group` is not an int.
7247        TypeError: If `kernel_size`, `stride`, `pad` , `dilation` or `output_padding` is neither an int not a tuple.
7248        ValueError: If `in_channel`, `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1.
7249        ValueError: If `pad` is less than 0.
7250        ValueError: If `pad_mode` is not one of 'same', 'valid' nor 'pad'.
7251        ValueError: If `pad` is a tuple whose length is not equal to 6.
7252        ValueError: If `pad_mode` is not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0, 0, 0).
7253        ValueError: If `data_format` is not 'NCDHW'.
7254        TypeError: If data type of dout and weight is neither float16 nor float32.
7255        ValueError: If bias is not none. The rank of dout and weight is not 5.
7256
7257    Supported Platforms:
7258        ``Ascend`` ``GPU`` ``CPU``
7259
7260    Examples:
7261        >>> import mindspore
7262        >>> import numpy as np
7263        >>> from mindspore import Tensor, ops
7264        >>> dout = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float16)
7265        >>> weight = Tensor(np.ones([16, 3, 4, 6, 2]), mindspore.float16)
7266        >>> conv3d_transpose = ops.Conv3DTranspose(in_channel=16, out_channel=3, kernel_size=(4, 6, 2))
7267        >>> output = conv3d_transpose(dout, weight)
7268        >>> print(output.shape)
7269        (32, 3, 13, 37, 33)
7270    """
7271
7272    @prim_attr_register
7273    def __init__(self,
7274                 in_channel,
7275                 out_channel,
7276                 kernel_size,
7277                 mode=1,
7278                 pad_mode='valid',
7279                 pad=0,
7280                 stride=1,
7281                 dilation=1,
7282                 group=1,
7283                 output_padding=0,
7284                 data_format="NCDHW"):
7285        """Initialize Conv3DTranspose"""
7286        self.init_prim_io_names(inputs=['x', 'filter'], outputs=['output'])
7287        self.in_channel = validator.check_positive_int(in_channel, 'in_channel', self.name)
7288        self.add_prim_attr('in_channel', self.in_channel)
7289        self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
7290        self.add_prim_attr('out_channel', self.out_channel)
7291        self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name)
7292        if isinstance(kernel_size, int):
7293            self.kernel_size = (kernel_size,) * 3
7294        self.add_prim_attr('kernel_size', self.kernel_size)
7295        self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False,
7296                                             ret_five=True)
7297        self.add_prim_attr('strides', self.stride)
7298        self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False,
7299                                               ret_five=True, third_one=True)
7300        self.add_prim_attr('dilations', self.dilation)
7301        validator.check_value_type('pad', pad, (int, tuple), self.name)
7302        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
7303        if isinstance(pad, int):
7304            pad = (pad,) * 6
7305        if len(pad) != 6:
7306            raise ValueError(f"For '{self.name}', attr 'pad' must be an positive int number or a tuple of "
7307                             f"six positive int numbers, but got {self.pad}.")
7308        self.pad_list = pad
7309        validator.check_value_type('pad_mode', pad_mode, [str], self.name)
7310        self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name)
7311        self.add_prim_attr('pad_mode', self.pad_mode)
7312
7313        if self.pad_mode != 'pad' and pad != (0, 0, 0, 0, 0, 0):
7314            raise ValueError(f"For '{self.name}', the 'pad' must be zero or (0, 0, 0, 0, 0, 0) when 'pad_mode' "
7315                             f"is not \"pad\", but got 'pad' is {self.pad} and 'pad_mode' is {pad_mode}.")
7316
7317        if self.pad_mode == 'pad':
7318            for item in self.pad_list:
7319                validator.check_non_negative_int(item, 'pad item', self.name)
7320        self.add_prim_attr('pad_list', self.pad_list)
7321        self.mode = validator.check_equal_int(mode, 1, 'mode', self.name)
7322        self.add_prim_attr('mode', self.mode)
7323        validator.check_value_type("group", group, (int,), self.name)
7324        validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
7325        if self.out_channel % group != 0:
7326            raise ValueError("The argument 'group' should be divisible by 'out_channel'")
7327        device_target = context.get_context("device_target")
7328        if device_target == "Ascend" and group != 1:
7329            raise ValueError("On Ascend platform, group = 1 must be satisfied.")
7330        self.group = group
7331        self.add_prim_attr('groups', self.group)
7332
7333        self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name)
7334        self.add_prim_attr('data_format', self.format)
7335
7336        self.output_padding = _check_3d_int_or_tuple('output_padding', output_padding, self.name,
7337                                                     allow_five=False, ret_five=True, greater_zero=False)
7338        output_padding_ = (self.output_padding[2], self.output_padding[3], self.output_padding[4])
7339        if self.pad_mode != 'pad' and output_padding_ != (0, 0, 0):
7340            raise ValueError(f"For '{self.name}', the 'output_padding' must be zero or (0, 0, 0) "
7341                             f"when 'pad_mode' is not \"pad\", but got 'output_padding' is "
7342                             f"{output_padding} and 'pad_mode' is {pad_mode}.")
7343        self.add_prim_attr('output_padding', self.output_padding)
7344        validator.check_int_range(self.kernel_size[0] * self.kernel_size[1] * self.kernel_size[2],
7345                                  1, 343, validator.INC_BOTH,
7346                                  'The product of height, width and depth of kernel_size belonging [1, 343]',
7347                                  self.name)
7348        validator.check_int_range(self.stride[0] * self.stride[1] * self.stride[2], 1, 343, validator.INC_BOTH,
7349                                  'The product of height, width and depth of stride belonging [1, 343]', self.name)
7350        validator.check_int_range(self.stride[1] * self.stride[2], 1, 256, validator.INC_BOTH,
7351                                  'The product of height, width and depth of stride belonging [1, 256]', self.name)
7352        validator.check_int_range(self.output_padding[2], 0, max(self.dilation[2], self.stride[2]), validator.INC_LEFT,
7353                                  'output_padding_d belonging [0, max(stride_d, dilation_d))', self.name)
7354        validator.check_int_range(self.output_padding[3], 0, max(self.dilation[3], self.stride[3]), validator.INC_LEFT,
7355                                  'output_padding_h belonging [0, max(stride_h,dilation_h))', self.name)
7356        validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), validator.INC_LEFT,
7357                                  'output_padding_w belonging [0, max(stride_w,dilation_w))', self.name)
7358
7359
7360class Dilation2D(Primitive):
7361    r"""
7362    Computes the grayscale dilation of 4-D input and 3-D filters tensors.
7363
7364    Applies a 2D dilation over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
7365    where :math:`N` is batch size, :math:`H` is height, :math:`W` is width, :math:`C` is channel number.
7366    Given kernel size :math:`ks = (h_{ker}, w_{ker})`, stride :math:`s = (s_0, s_1)` and
7367    dilation :math:`d = (d_0, d_1)`, the operation is as follows:
7368
7369    .. math::
7370        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
7371        \text{input}(N_i, C_j, s_0 \times h + d_0 \times m, s_1 \times w + d_1 \times n) + \text{filter}(C_j, m, n)
7372
7373    .. warning::
7374        This is an experimental API that is subjected to change or deletion.
7375
7376    Note:
7377        If the input data type is float32, this operator is still executed in float16 mode.
7378
7379    Args:
7380        stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
7381            the height and width of movement are both strides, or a tuple of two int numbers that
7382            represent height and width of movement respectively, or a tuple of four int numbers when
7383            data_format is 'NCHW' represents [1, 1, stride_height, stride_width].
7384
7385        dilation (Union(int, tuple[int])): The data type is int or a tuple of 2 integers or a tuple of 4 integers.
7386                                      Specifies the dilation rate to use for dilated convolution.
7387                                      If set to be :math:`k > 1`, there will be :math:`k - 1` pixels skipped for
7388                                      each sampling location. Its value must be greater or equal to 1 and bounded by
7389                                      the height and width of the input `x`.
7390
7391        pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
7392            ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
7393
7394            - ``"same"``: Pad the input around its edges so that the shape of input and output
7395              are the same when `stride` is set to ``1``.
7396              The amount of padding to is calculated by the operator internally, If the amount is even, it is
7397              uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
7398            - ``"valid"``: No padding is applied to the input, and the output returns the maximum
7399              possible height and width. Extra pixels that could not complete a full stride will
7400              be discarded.
7401
7402        data_format (str, optional): The value for data format, only ``'NCHW'`` is supported at present.
7403            Default: ``"NCHW"`` .
7404
7405    Inputs:
7406        - **x** (Tensor) - Input data. A 4-D Tensor, its shape must be
7407          :math:`(N, C_{in}, H_{in}, W_{in})`.
7408        - **filter** (Tensor) - A three dimension tensor with the same type as input. The shape must be
7409          :math:`(C_{in}, H_{filter}, W_{filter})`.
7410
7411    Outputs:
7412        Tensor, the value that applied 2D dilation. The shape is :math:`(N, C_{out}, H_{out}, W_{out})` which
7413        is not necessarily the same as the input x, the type is the same as the input x.
7414
7415    Raises:
7416        TypeError: If type of `x` or `filter` is not the type in [uint8, uint16, uint32, uint64, int8, int16,
7417                                  int32, int64, float16, float32, float64].
7418        TypeError: If `stride` or `dilation` is not an int number or a tuple of two or four int numbers.
7419        ValueError: If the length of `stride` or `dilation` is neither two nor four when they are tuple.
7420        ValueError: If `stride` or `dilation` shape is not (1, 1, height, width) when it is a tuple of four int numbers.
7421        ValueError: If `stride` is not in the range of [1, 255].
7422        ValueError: If `dilation` is less than 1.
7423        ValueError: If `pad_mode` is not a str of 'same', 'valid', 'SAME' or 'VALID'.
7424        ValueError: If `data_format` is not the str of 'NCHW'.
7425
7426    Supported Platforms:
7427        ``GPU`` ``CPU``
7428
7429    Examples:
7430        >>> x = Tensor(np.ones([10, 5, 32, 32]), mindspore.float16)
7431        >>> filter = Tensor(np.ones([5, 3, 3]), mindspore.float16)
7432        >>> dilation2d = ops.Dilation2D(stride=1, dilation=1, pad_mode='VALID')
7433        >>> output = dilation2d(x, filter)
7434        >>> print(output.shape)
7435        (10, 5, 30, 30)
7436    """
7437
7438    @prim_attr_register
7439    def __init__(self, stride, dilation, pad_mode="SAME", data_format="NCHW"):
7440        """Initialize Dilation2D."""
7441        self.init_prim_io_names(inputs=['x', 'filter'], outputs=['y'])
7442
7443        def _check_format_stride_or_dilation(arg_name, arg_value, prim_name, data_format):
7444            validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
7445            if isinstance(arg_value, int):
7446                ret_value = (1, arg_value, arg_value, 1) if data_format == "NHWC" else (1, 1, arg_value, arg_value)
7447            elif len(arg_value) == 2:
7448                ret_value = (1, arg_value[0], arg_value[1], 1) if data_format == "NHWC" else \
7449                    (1, 1, arg_value[0], arg_value[1])
7450            elif len(arg_value) == 4:
7451                if data_format == "NHWC" and (arg_value[0] != 1 or arg_value[3] != 1):
7452                    raise ValueError(
7453                        f"For '{prim_name}' attr '{arg_name}' should be [1, {arg_name}_height, {arg_name}_weigth, 1]"
7454                        f"when data_format is 'NHWC', but got {arg_value}")
7455                if data_format == "NCHW" and (arg_value[0] != 1 or arg_value[1] != 1):
7456                    raise ValueError(
7457                        f"For '{prim_name}' attr '{arg_name}' should be [1, 1, {arg_name}_height, {arg_name}_weigth]"
7458                        f"when data_format is 'NCHW', but got {arg_value}")
7459                ret_value = arg_value
7460            else:
7461                raise ValueError(
7462                    f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
7463                    f"or four positive int numbers, but got {arg_value}")
7464            for item in ret_value:
7465                if isinstance(item, int) and not isinstance(item, bool) and item > 0:
7466                    continue
7467                raise ValueError(
7468                    f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
7469                    f"or four positive int numbers, but got {arg_value}")
7470            return ret_value
7471
7472        if data_format == 'NHWC':
7473            raise ValueError(f"For '{self.name}', NHWC format is not supported at present.")
7474        self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name)
7475        self.add_prim_attr('data_format', self.data_format)
7476        self.pad_mode = validator.check_string(pad_mode, ['VALID', 'SAME', 'valid', 'same'], 'pad_mode', self.name)
7477        self.add_prim_attr('pad_mode', self.pad_mode.upper())
7478        self.stride = _check_format_stride_or_dilation("stride", stride, self.name, self.data_format)
7479
7480        def is_in_range(x):
7481            return 1 <= x <= 255
7482
7483        if not is_in_range(self.stride[2]) or not is_in_range(self.stride[3]):
7484            raise ValueError(f'For Dilation2D, size of stride is not supported, '
7485                             f'stride should be in the range of [1, 255], '
7486                             f'but got stride_h: `{self.stride[2]}`, stride_w: `{self.stride[3]}`.')
7487        self.add_prim_attr('stride', self.stride)
7488        self.dilation = _check_format_stride_or_dilation("dilation", dilation, self.name, self.data_format)
7489        self.add_prim_attr('dilation', self.dilation)
7490
7491
7492class SoftShrink(Primitive):
7493    r"""
7494    Applies the SoftShrink function element-wise.
7495
7496    Refer to :func:`mindspore.ops.softshrink` for more details.
7497
7498    Args:
7499        lambd(float, optional): The :math:`\lambda` must be no less than zero. Default: ``0.5`` .
7500
7501    Inputs:
7502        - **input_x** (Tensor) - The input of soft shrink with data type of float16 or float32.
7503
7504    Outputs:
7505        Tensor, has the same shape and data type as `input_x`.
7506
7507    Supported Platforms:
7508        ``Ascend`` ``GPU`` ``CPU``
7509
7510    Examples:
7511        >>> import mindspore
7512        >>> import numpy as np
7513        >>> from mindspore import Tensor, ops
7514        >>> input_x = Tensor(np.array([[ 0.5297,  0.7871,  1.1754], [ 0.7836,  0.6218, -1.1542]]), mindspore.float16)
7515        >>> softshrink = ops.SoftShrink()
7516        >>> output = softshrink(input_x)
7517        >>> print(output)
7518        [[ 0.02979  0.287    0.676  ]
7519         [ 0.2837   0.1216  -0.6543 ]]
7520    """
7521
7522    @prim_attr_register
7523    def __init__(self, lambd=0.5):
7524        """Initialize SoftShrink"""
7525        validator.check_value_type("lambd", lambd, [float], self.name)
7526        validator.check_number("lambd", lambd, 0, validator.GE, self.name)
7527
7528
7529class ApplyAdagradDA(Primitive):
7530    r"""
7531    Update `var` according to the proximal adagrad scheme.
7532    The Adagrad algorithm was proposed in
7533    `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
7534    <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_.
7535
7536    .. math::
7537        \begin{array}{ll} \\
7538            grad\_accum += grad \\
7539            grad\_squared\_accum += grad * grad \\
7540            tmp\_val=
7541                \begin{cases}
7542                     sign(grad\_accum) * max\left \{|grad\_accum|-l1*global\_step, 0\right \} & \text{ if } l1>0 \\
7543                     grad\_accum & \text{ otherwise } \\
7544                 \end{cases} \\
7545            x\_value = -1 * lr * tmp\_val \\
7546            y\_value = l2 * global\_step * lr + \sqrt{grad\_squared\_accum} \\
7547            var = \frac{ x\_value }{ y\_value }
7548        \end{array}
7549
7550    Inputs of `var`, `gradient_accumulator`, `gradient_squared_accumulator` and `grad`
7551    comply with the implicit type conversion rules to make the data types consistent.
7552    If they have different data types, the lower priority data type will be converted to
7553    the relatively highest priority data type.
7554
7555    Args:
7556        use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock.
7557                            Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
7558
7559    Inputs:
7560        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
7561          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
7562        - **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`. Must have the same
7563          shape as `var`.
7564        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
7565          Must have the same shape as `var`.
7566        - **grad** (Tensor) - A tensor for gradient. Must have the same shape as `var`.
7567        - **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
7568        - **l1** ([Number, Tensor]) -  L1 regularization. Must be a scalar. With float32 or float16 data type.
7569        - **l2** ([Number, Tensor]) -  L2 regularization. Must be a scalar. With float32 or float16 data type.
7570        - **global_step** ([Number, Tensor]) - Training step number. Must be a scalar. With int32 or int64 data type.
7571
7572    Outputs:
7573        Tuple of 1 Tensors, the updated parameters.
7574
7575        - **var** (Tensor) - The same shape and data type as `var`.
7576
7577    Raises:
7578        TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` is not a Parameter.
7579        TypeError: If `grad` is not a Tensor.
7580        TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
7581        TypeError: If use_locking is not a bool.
7582        TypeError: If dtype of `var`, `gradient_accumulator`, `gradient_squared_accumulator`, `grad`,
7583                   `lr`, `l1` or `l2` is neither float16 nor float32.
7584        TypeError: If dtype of `gradient_accumulator`, `gradient_squared_accumulator` or `grad` is not same as `var`.
7585        TypeError: If dtype of `global_step` is not int32 nor int64.
7586        ValueError: If the shape size of `lr`, `l1`, `l2` and `global_step` is not 0.
7587        TypeError: If the data type of `var`, `gradient_accumulator`, `gradient_squared_accumulator` and `grad`
7588                      conversion of Parameter is not supported.
7589
7590    Supported Platforms:
7591        ``Ascend`` ``GPU`` ``CPU``
7592
7593    Examples:
7594        >>> import numpy as np
7595        >>> from mindspore import dtype as mstype
7596        >>> from mindspore import Tensor, nn, ops, Parameter
7597        >>> class ApplyAdagradDANet(nn.Cell):
7598        ...     def __init__(self, use_locking=False):
7599        ...         super(ApplyAdagradDANet, self).__init__()
7600        ...         self.apply_adagrad_d_a = ops.ApplyAdagradDA(use_locking)
7601        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32)), name="var")
7602        ...         self.gradient_accumulator = Parameter(Tensor(np.array([[0.1, 0.3],
7603        ...                                                                [0.1, 0.5]]).astype(np.float32)),
7604        ...                                               name="gradient_accumulator")
7605        ...         self.gradient_squared_accumulator = Parameter(Tensor(np.array([[0.2, 0.1],
7606        ...                                                                        [0.1, 0.2]]).astype(np.float32)),
7607        ...                                                       name="gradient_squared_accumulator")
7608        ...         self.gradient_accumulator = Parameter(Tensor(np.array([[0.1, 0.3],
7609        ...                                                                [0.1, 0.5]]).astype(np.float32)),
7610        ...                                               name="gradient_accumulator")
7611        ...     def construct(self, grad, lr, l1, l2, global_step):
7612        ...         out = self.apply_adagrad_d_a(self.var, self.gradient_accumulator,
7613        ...                                      self.gradient_squared_accumulator, grad, lr, l1, l2, global_step)
7614        ...         return out
7615        ...
7616        >>> net = ApplyAdagradDANet()
7617        >>> grad = Tensor(np.array([[0.3, 0.4], [0.1, 0.2]]).astype(np.float32))
7618        >>> lr = Tensor(0.001, mstype.float32)
7619        >>> l1 = Tensor(0.001, mstype.float32)
7620        >>> l2 = Tensor(0.001, mstype.float32)
7621        >>> global_step = Tensor(2, mstype.int32)
7622        >>> output = net(grad, lr, l1, l2, global_step)
7623        >>> print(output)
7624        (Tensor(shape=[2, 2], dtype=Float32, value=
7625        [[-7.39064650e-04, -1.36888528e-03],
7626         [-5.96988888e-04, -1.42478070e-03]]))
7627    """
7628
7629    __mindspore_signature__ = (
7630        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7631        sig.make_sig('gradient_accumulator', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7632        sig.make_sig('gradient_squared_accumulator', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7633        sig.make_sig('grad', dtype=sig.sig_dtype.T),
7634        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
7635        sig.make_sig('l1', dtype=sig.sig_dtype.T2),
7636        sig.make_sig('l2', dtype=sig.sig_dtype.T3),
7637        sig.make_sig('global_step', dtype=sig.sig_dtype.T4)
7638    )
7639
7640    @prim_attr_register
7641    def __init__(self, use_locking=False):
7642        """Initialize ApplyAdagradDA"""
7643        validator.check_value_type("use_locking", use_locking, [bool], self.name)
7644        self.add_prim_attr('side_effect_mem', True)
7645
7646
7647class SparseApplyRMSProp(Primitive):
7648    r"""
7649    Update relevant entries according to the rmsprop algorithm.
7650
7651    .. math::
7652        \begin{array}{ll} \\
7653            ms = rho * ms_{t-1} + (1 - rho) * grad * grad \\
7654            mom = momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) \\
7655            var = var - mom
7656        \end{array}
7657
7658    Inputs of `var`, `ms`, `mom` and `grad` comply with the implicit type conversion rules
7659    to make the data types consistent.
7660    If they have different data types, the lower priority data type will be converted to
7661    the relatively highest priority data type.
7662
7663    Args:
7664        rho (float): Decay rate. The value should be between 0 and 1, otherwise the behavior is undefined.
7665        momentum (float): Momentum. The value should be greater or equal to 0, otherwise the behavior is undefined.
7666        epsilon (float): A small value added for numerical stability. The value should be greater than 0,
7667                         otherwise the behavior is undefined.
7668        use_locking (bool): If ``True`` , updating of the var, ms, and mom tensors are protected by a lock;
7669                            otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
7670
7671    Inputs:
7672        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
7673          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
7674        - **ms** (Parameter) - The dict of mutable tensor ms. Must have the same shape and dtype as `var`.
7675        - **mom** (Parameter) - The dict of mutable tensor mom. Must have the same shape and dtype as `var`.
7676        - **lr** ([Number, Tensor]) - Learning rate. Must be a scalar. With float16 or float32 data type.
7677        - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
7678        - **indices** (Tensor) - A tensor of indices in the first dimension of `var`, `ms` and `mom`.
7679          If there are duplicates in `indices`, the behavior is undefined. Must be one of the
7680          following types: int32, int64 and indices.shape[0] = var.shape[0].
7681
7682    Outputs:
7683        Tuple of 3 Tensors, the updated parameters.
7684
7685        - **var** (Tensor) -  The same shape and data type as `var`.
7686        - **ms** (Tensor) - The same shape and data type as `ms`.
7687        - **mom** (Tensor) - The same shape and data type as `mom`.
7688
7689    Raises:
7690        TypeError: If `var`, `ms` or `mom` is not a Parameter.
7691        TypeError: If `grad` or `indices` is not a Tensor.
7692        TypeError: If dtype of `var`, `ms`, `mom`, `lr`, `grad` is neither float16 nor float32.
7693        TypeError: If dtype of `indices` is neither int32 nor int64.
7694        TypeError: If `lr` is neither a Number or a Tensor.
7695        TypeError: If `use_locking` is not a bool.
7696        TypeError: If dtype of `epsilon`, `rho`, `momentum` is not a float.
7697        ValueError: If shape of `ms`, `mom`, `grad` is not same as `var`.
7698        ValueError: If the shape size of `lr` is not 0.
7699        ValueError: If shape of `indices` is not same as shape of first dimension of `var`.
7700        ValueError: If `epsilon` is less than or equal to 0.
7701        ValueError: If `momentum` is less than 0.
7702        ValueError: If `rho` is less than 0 or greater than 1.
7703        ValueError: If dimension of `var` is less than 1.
7704        RuntimeError: If the data type of `var`, `ms`, `mom` and `grad` conversion of Parameter is not supported.
7705
7706    Supported Platforms:
7707        ``Ascend``  ``GPU`` ``CPU``
7708
7709    Examples:
7710        >>> class SparseApplyRMSPropNet(nn.Cell):
7711        ...     def __init__(self, rho, momentum, epsilon, use_locking=False):
7712        ...         super(SparseApplyRMSPropNet, self).__init__()
7713        ...         self.sparse_apply_r_m_s_prop = P.SparseApplyRMSProp(rho, momentum, epsilon, use_locking)
7714        ...         self.var = Parameter(Tensor(np.array([[0.6, 0.3], [0.1, 0.5]]).astype(np.float32)), name="var")
7715        ...         self.ms = Parameter(Tensor(np.array([[0.2, 0.4], [0.1, 0.3]]).astype(np.float32)), name="ms")
7716        ...         self.mom = Parameter(Tensor(np.array([[0.3, 0.1], [0.3, 0.6]]).astype(np.float32)), name="mom")
7717        ...     def construct(self, lr, grad, indices):
7718        ...         out = self.sparse_apply_r_m_s_prop(self.var, self.ms, self.mom, lr, grad, indices)
7719        ...         return out
7720        ...
7721        >>> rho = 0.2
7722        >>> momentum = 0.01
7723        >>> epsilon = 1e-6
7724        >>> net = SparseApplyRMSPropNet(rho, momentum, epsilon)
7725        >>> lr = 0.01
7726        >>> grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
7727        >>> indices = Tensor(np.array([0, 1], dtype=np.int32))
7728        >>> out = net(lr, grad, indices)
7729        >>> print(out)
7730        (Tensor(shape=[2, 2], dtype=Float32, value=
7731        [[ 5.88035822e-01,  2.88811117e-01],
7732         [ 9.10239667e-02,  4.83422279e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
7733        [[ 1.12000003e-01,  4.72000003e-01],
7734         [ 2.80000009e-02,  5.72000027e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
7735        [[ 1.19641740e-02,  1.11888833e-02],
7736         [ 8.97603668e-03,  1.65777095e-02]]))
7737    """
7738
7739    __mindspore_signature__ = (
7740        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7741        sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7742        sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7743        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
7744        sig.make_sig('grad', dtype=sig.sig_dtype.T),
7745        sig.make_sig('indices', dtype=sig.sig_dtype.T2)
7746    )
7747
7748    @prim_attr_register
7749    def __init__(self, rho, momentum, epsilon, use_locking=False):
7750        """"Initialize SparseApplyRMSProp"""
7751        validator.check_value_type("rho", rho, [float], self.name)
7752        validator.check_value_type("momentum", momentum, [float], self.name)
7753        validator.check_value_type("epsilon", epsilon, [float], self.name)
7754        validator.check_value_type("use_locking", use_locking, [bool], self.name)
7755        self.epsilon = validator.check_number("epsilon", epsilon, 0.0, validator.GT, self.name)
7756        self.momentum = validator.check_number("momentum", momentum, 0.0, validator.GE, self.name)
7757        self.rho = validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", self.name)
7758
7759
7760class SparseApplyCenteredRMSProp(Primitive):
7761    r"""
7762    Update `var` according to the centered RMSProp algorithm.
7763
7764    .. math::
7765        \begin{array}{l}
7766            \text { mean_square }=\text { decay } * \text { mean_square }+(1-\text { decay }) *
7767            \text { gradient }^{2} \\
7768            \text { mean_grad }=\text { decay } * \text { mean_grad }+(1-\text { decay }) *
7769            \text { gradient } \\
7770            \text { Delta }=l r * \frac{\text { gradient }}{\sqrt{\text { mean_square }+
7771            \text { epsilon-mean_grad }^{2}}} \\
7772            \text { ms }<-\text { rho } * \text { ms }_{t-1}+(1-\text { rho }) * \text { grad } * \text { grad } \\
7773            \text { mom }<-\text { momentum } * \text { mom }_{t-1}+\operatorname{lr} *
7774            \frac{\text { grad }}{\sqrt{\text { ms+epsilon }}} \\
7775            \text { var }<-\text { var }-\text { mom }
7776        \end{array}
7777
7778    .. warning::
7779        In dense implementation of this algorithm, `mean_gradient`, `mean_square`, and `moment` will update
7780        even if the `grad` is zero. But in this sparse implementation, `mean_gradient`, `mean_square`, and `moment`
7781        will not update in iterations during which the `grad` is zero.
7782
7783    Args:
7784        use_locking (bool): If ``True`` , updating of the `var`, `mg`, `ms`, and `mom` tensors will be protected by a
7785                            lock. Otherwise the behavior is undefined, but may exhibit less contention.
7786                            Default: ``False`` .
7787
7788    Inputs:
7789        - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64,
7790          uint8, uint16, uint32, uint64, float16, float32 or float64.
7791          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
7792        - **mg** (Parameter) - Mean gradients. Must have the same shape and dtype as `var`.
7793        - **ms** (Parameter) - Mean square gradients. Must have the same shape and dtype as `var`.
7794        - **mom** (Parameter) - Delta of `var`. Must have the same shape and dtype as `var`.
7795        - **lr** (Union[Number, Tensor]) - Learning rate. Must be a float number or a scalar tensor.
7796          Must have the same type as `var`.
7797        - **rho** (Union[Number, Tensor]) - Decay rate. Must be a float number or a scalar tensor.
7798          Must have the same type as `var`.
7799        - **momentum** (Union[Number, Tensor]) - Momentum. Must be a float number or a scalar tensor.
7800          Must have the same type as `var`.
7801        - **epsilon** (Union[Number, Tensor]) - Ridge term. Must be a float number or a scalar tensor.
7802          Must have the same type as `var`.
7803        - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
7804        - **indices** (Tensor) - Gradient indices. Must be one of the following types: int32, int64.
7805          and indices.shape[0] = grad.shape[0].
7806
7807    Outputs:
7808        - **var** (Tensor) - Tensor, has the same shape and data type as `var`.
7809
7810    Raises:
7811        TypeError: If `use_locking` is not a bool.
7812        TypeError: If `var`, `mg`, `ms`, `mom`, `grad`, `indices` is not a Tensor.
7813        TypeError: If `lr`, `rho`, `momentum` or `epsilon` is neither a Number nor a Tensor.
7814        TypeError: If dtype of `var`, `mg`, `ms`, `mom`, `lr`, `rho`, `momentum`, `epsilon` or `grad`
7815                   is neither float16 nor float32.
7816        TypeError: If dtype of `mg`, `ms`, `mom`, `grad` is not same as `var`.
7817        TypeError: If dtype of `indices` is not int32 or int64.
7818        ValueError: If shape of `mg`, `ms` or `mom` is not same as `var`.
7819        ValueError: If the rank of `indices` is not equal to 1.
7820        ValueError: If dimension of `grad` is not equal or greater than 1.
7821        ValueError: If shape of `indices` is not same as shape of first dimension of `grad`.
7822        ValueError: If shape of `grad` is not same as shape of `var` except first dimension.
7823
7824    Supported Platforms:
7825        ``Ascend`` ``GPU`` ``CPU``
7826
7827    Examples:
7828        >>> var = Tensor(np.array([[0.6, 0.4], [0.1, 0.5]]).astype(np.float32))
7829        >>> mg = Tensor(np.array([[0.1, 0.3], [0.1, 0.5]]).astype(np.float32))
7830        >>> ms = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32))
7831        >>> mom = Tensor(np.array([[0.2, 0.1], [0.1, 0.2]]).astype(np.float32))
7832        >>> lr = Tensor(0.001, mstype.float32)
7833        >>> rho = Tensor(1e-10, mstype.float32)
7834        >>> momentum = Tensor(0.001, mstype.float32)
7835        >>> epsilon = Tensor(0.01, mstype.float32)
7836        >>> grad = Tensor(np.array([[0.3, 0.4], [0.1, 0.2]]).astype(np.float32))
7837        >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
7838        >>> sparse_apply_centered_rms_prop = nn_ops.SparseApplyCenteredRMSProp()
7839        >>> output = sparse_apply_centered_rms_prop(var, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices)
7840        >>> print(output)
7841        [[0.5968 0.3959]
7842         [0.0989 0.4978]]
7843    """
7844
7845    __mindspore_signature__ = (
7846        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7847        sig.make_sig('mg', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7848        sig.make_sig('ms', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7849        sig.make_sig('mom', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7850        sig.make_sig('lr', dtype=sig.sig_dtype.T),
7851        sig.make_sig('rho', dtype=sig.sig_dtype.T),
7852        sig.make_sig('momentum', dtype=sig.sig_dtype.T),
7853        sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
7854        sig.make_sig('grad', dtype=sig.sig_dtype.T),
7855        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
7856    )
7857
7858    @prim_attr_register
7859    def __init__(self, use_locking=False):
7860        """Initialize SparseApplyCenteredRMSProp."""
7861        self.init_prim_io_names(inputs=['var', 'mg', 'ms', 'mom', 'lr', 'rho', 'momentum',
7862                                        'epsilon', 'grad', 'indices'],
7863                                outputs=['var'])
7864        validator.check_value_type("use_locking", use_locking, [bool], self.name)
7865
7866
7867class ApplyKerasMomentum(Primitive):
7868    r"""
7869    Update `var` according to the momentum scheme.
7870
7871    .. math::
7872        \begin{array}{ll} \\
7873            accum = accum * momentum - grad * lr \\
7874            var =
7875            \begin{cases}
7876                var + accum * momentum - grad * lr, &\text{if use_nesterov} \\
7877                var + accum, &\text{else}
7878            \end{cases}
7879        \end{array}
7880
7881    Refer to the paper `On the importance of initialization and momentum in deep
7882    learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_  for more details.
7883
7884    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
7885    to make the data types consistent.
7886    If they have different data types, the lower priority data type will be converted to
7887    relatively highest priority data type.
7888    RuntimeError exception will be thrown when the data type conversion of Parameter is required.
7889
7890    Args:
7891        use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock;
7892                            Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
7893        use_nesterov (bool): If ``True`` , the tensor passed to compute grad will be var + momentum * accum,
7894                            so in the end, the var you get is actually var + momentum * accum. Default: ``False`` .
7895
7896    Inputs:
7897        - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
7898        - **accum** (Parameter) - Must have the same shape and type as `var`. With float16 or float32 data type.
7899        - **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. With float16 or float32 data type.
7900        - **grad** (Tensor) - The gradient. Must have the same shape and type as `var`.
7901          With float16 or float32 data type.
7902        - **momentum** (Union[Number, Tensor]) - Momentum. Must be a scalar. With float16 or float32 data type.
7903
7904    Outputs:
7905        Tuple of 2 Tensors, the updated parameters.
7906
7907        - **var** (Tensor) - The same shape and data type as `var`.
7908        - **accum** (Tensor) - The same shape and data type as `accum`.
7909
7910    Raises:
7911        TypeError: If the use_locking or use_nesterov is not a bool.
7912        TypeError: If `var` or `accum` is not a Parameter.
7913        TypeError: If `lr` is neither a Number nor a Tensor.
7914        TypeError: If `grad` is not a Tensor.
7915        TypeError: If `momentum` is neither a Number nor a Tensor.
7916        TypeError: If dtype of `var`, `accum`, `lr`, `grad`, `momentum` is neither float16 nor float32.
7917        ValueError: If `accum` or `grad` doesn't have the same shape as `var`.
7918        ValueError: If the shape size of `lr`, `momentum` is not 0.
7919
7920    Supported Platforms:
7921        ``Ascend``
7922
7923    Examples:
7924        >>> class ApplyKerasMomentumNet(nn.Cell):
7925        ...     def __init__(self, use_locking=False, use_nesterov=False):
7926        ...         super(ApplyKerasMomentumNet, self).__init__()
7927        ...         self.apply_keras_momentum = P.ApplyKerasMomentum(use_locking, use_nesterov)
7928        ...         self.var = Parameter(Tensor(np.array([[0.2, 0.3], [0.1, 0.4]]).astype(np.float32)), name="var")
7929        ...         self.accum = Parameter(Tensor(np.array([[0.2, 0.3], [0.1, 0.4]]).astype(np.float32)), name="accum")
7930        ...     def construct(self, lr, grad, momentum):
7931        ...         out = self.apply_keras_momentum(self.var, self.accum, lr, grad, momentum)
7932        ...         return out
7933        ...
7934        >>> net = ApplyKerasMomentumNet()
7935        >>> lr = Tensor(0.001, mstype.float32)
7936        >>> grad = Tensor(np.array([[0.3, 0.2], [0.4, 0.1]]).astype(np.float32))
7937        >>> momentum = Tensor(0.99, mstype.float32)
7938        >>> output = net(lr, grad, momentum)
7939        >>> print(output)
7940        (Tensor(shape=[2, 2], dtype=Float32, value=
7941        [[ 3.97700012e-01,  5.96800029e-01],
7942        [ 1.98599994e-01,  7.95899987e-01]]), Tensor(shape=[2, 2], dtype=Float32, value=
7943        [[ 1.97699994e-01,  2.96800017e-01],
7944        [ 9.86000001e-02,  3.95900011e-01]]))
7945    """
7946
7947    __mindspore_signature__ = (
7948        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7949        sig.make_sig('accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
7950        sig.make_sig('lr', dtype=sig.sig_dtype.T1),
7951        sig.make_sig('grad', dtype=sig.sig_dtype.T),
7952        sig.make_sig('momentum', dtype=sig.sig_dtype.T2)
7953    )
7954
7955    @prim_attr_register
7956    def __init__(self, use_locking=False, use_nesterov=False):
7957        """Initialize ApplyKerasMomentum"""
7958        validator.check_value_type("use_locking", use_locking, [bool], self.name)
7959        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
7960
7961
7962class MultilabelMarginLoss(Primitive):
7963    r"""
7964    Creates a loss criterion that minimizes the hinge loss for multi-class
7965    classification tasks.
7966    It takes a 2D mini-batch Tensor :math:`x` as input and a 2D
7967    Tensor :math:`y` containing target class indices as output.
7968
7969    Refer to :func:`mindspore.ops.multilabel_margin_loss` for more details.
7970
7971    Args:
7972        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
7973            ``'sum'`` . Default: ``'mean'`` .
7974
7975            - ``'none'``: no reduction will be applied.
7976            - ``'mean'``: compute and return the mean of elements in the output.
7977            - ``'sum'``: the output elements will be summed.
7978
7979    Inputs:
7980        - **x** (Tensor) - Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N`
7981          is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32.
7982        - **target** (Tensor) - Ground truth data, with the same shape as `input`, data type must be int32 and
7983          label targets padded by -1.
7984
7985    Outputs:
7986        - **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is ``"none"``, its shape
7987          is :math:`(N)`. Otherwise, a scalar value will be returned.
7988        - **is_target** (Tensor) - Output tensor for backward input, with the same shape as `target`,
7989          data type must be int32.
7990
7991    Supported Platforms:
7992        ``Ascend`` ``GPU``
7993
7994    Examples:
7995       >>> import mindspore
7996       >>> import numpy as np
7997       >>> from mindspore import Tensor, ops
7998       >>> loss = ops.MultilabelMarginLoss()
7999       >>> x = Tensor(np.array([[0.1, 0.2, 0.4, 0.8], [0.2, 0.3, 0.5, 0.7]]), mindspore.float32)
8000       >>> target = Tensor(np.array([[1, 2, 0, 3], [2, 3, -1, 1]]), mindspore.int32)
8001       >>> output = loss(x, target)
8002       >>> print(output)
8003       (Tensor(shape=[], dtype=Float32, value= 0.325), Tensor(shape=[2, 4], dtype=Int32, value=
8004       [[1, 1, 1, 1], [0, 0, 1, 1]]))
8005    """
8006
8007    @prim_attr_register
8008    def __init__(self, reduction='mean'):
8009        """Initialize MultilabelMarginLoss"""
8010        self.init_prim_io_names(inputs=['x', 'target'], outputs=['y', 'is_target'])
8011        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
8012
8013
8014class ApplyAdamWithAmsgrad(Primitive):
8015    r"""
8016    Update var according to the Adam algorithm.
8017
8018    .. math::
8019        \begin{array}{l1} \\
8020            lr_t:=learning\_rate*\sqrt{1-\beta_2^t}/(1-\beta_1^t) \\
8021            m_t:=\beta_1*m_{t-1}+(1-\beta_1)*g \\
8022            v_t:=\beta_2*v_{t-1}+(1-\beta_2)*g*g \\
8023            \hat v_t:=max(\hat v_{t-1}, v_t) \\
8024            var:=var-lr_t*m_t/(\sqrt{\hat v_t}+\epsilon) \\
8025        \end{array}
8026
8027    Inputs of `var`, `m`, `v`, `vhat` and `grad` comply with the implicit type conversion rules
8028    to make the data types consistent.
8029    If they have different data types, the lower priority data type will be converted to
8030    the relatively highest priority data type.
8031
8032    Inputs of `beta1_power`, `beta1`, `beta2` and `epsilon` comply with the implicit type conversion rules
8033    to make the data types consistent.
8034    If they have different data types, the lower priority data type will be converted to
8035    the relatively highest priority data type.
8036
8037    However, note that there is no implicit type conversion rule between `var` and `beta1_power`;
8038    the two sets of rules are independent of each other.
8039
8040    Args:
8041        beta1 (float): A Tensor. Must have the same type as beta1_power. Momentum factor. Must be a scalar.
8042        beta2 (float): A Tensor. Must have the same type as beta1_power. Momentum factor. Must be a scalar.
8043        epsilon (float): A Tensor. Must have the same type as beta1_power. Ridge term. Must be a scalar.
8044        use_locking (bool): use_locking: If ``True`` , updating of the `var`, `m`, and `v` tensors will
8045          be protected by a lock; Otherwise the behavior is undefined, but may exhibit less contention.
8046          Default: ``False`` .
8047
8048    Inputs:
8049        - **var** (Parameter) - Variable to be updated. The data type can be float16 or float32.
8050        - **m** (Parameter) - The 1st moment vector in the updating formula,
8051          the shape and data type value should be the same as `var`.
8052        - **v** (Parameter) - the 2nd moment vector in the updating formula,
8053          the shape and data type value should be the same as `var`.
8054        - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
8055          the shape and data type value should be the same as `var`.
8056        - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
8057          a scalar tensor with float16 or float32 data type.
8058        - **beta2_power** (Union[float, Tensor]) - :math:`beta_2^t(\beta_2^{t})` in the updating formula,
8059          a scalar tensor with float16 or float32 data type.
8060        - **lr** (Union[float, Tensor]) - Scaling factor, a scalar tensor with float16 or float32 data type.
8061        - **grad** (Tensor) - The gradient, has the same shape and data type as `var`.
8062
8063    Outputs:
8064        Tuple of 4 Tensors, the updated parameters.
8065
8066        - **var** (Tensor) - The same shape and data type as `var`.
8067        - **m** (Tensor) - The same shape and data type as `m`.
8068        - **v** (Tensor) - The same shape and data type as `v`.
8069        - **vhat** (Tensor) - The same shape and data type as `vhat`.
8070
8071    Raises:
8072        TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter.
8073        TypeError: If `beta1_power`, `beta2_power`, `lr` is neither a Number nor a Tensor.
8074        TypeError: If `grad` is not a Tensor.
8075        TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
8076          `lr`, `grad`, `momentum` is not float32 or float16.
8077        ValueError: If `m` or `v` or `vhat` or `grad` doesn't have the same shape of `var`.
8078        ValueError: If the shape of `beta1_power`, `beta2_power`, `lr` is not 0.
8079
8080    Supported Platforms:
8081        ``Ascend`` ``GPU`` ``CPU``
8082
8083    Examples:
8084        >>> class ApplyAdamWithAmsgradNet(nn.Cell):
8085        ...     def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False):
8086        ...         super(ApplyAdamWithAmsgradNet, self).__init__()
8087        ...         self.apply_adam_with_amsgrad = P.ApplyAdamWithAmsgrad(beta1, beta2, epsilon, use_locking)
8088        ...         self.var = Parameter(Tensor(np.array([[0.2, 0.2], [0.2, 0.2]]).astype(np.float32)), name="var")
8089        ...         self.m = Parameter(Tensor(np.array([[0.1, 0.2], [0.4, 0.3]]).astype(np.float32)), name="m")
8090        ...         self.v = Parameter(Tensor(np.array([[0.2, 0.1], [0.3, 0.4]]).astype(np.float32)), name="v")
8091        ...         self.vhat = Parameter(Tensor(np.array([[0.1, 0.2], [0.6, 0.2]]).astype(np.float32)), name="vhat")
8092        ...     def construct(self, beta1_power, beta2_power, lr, grad):
8093        ...         out = self.apply_adam_with_amsgrad(self.var, self.m, self.v, self.vhat,
8094        ...                                            beta1_power, beta2_power, lr, grad)
8095        ...         return out
8096        >>> net = ApplyAdamWithAmsgradNet()
8097        >>> grad = Tensor(np.array([[0.4, 0.2], [0.2, 0.3]]).astype(np.float32))
8098        >>> output = net(Tensor(0.9, mstype.float32), Tensor(0.999, mstype.float32), Tensor(0.01, mstype.float32), grad)
8099        >>> print(net.var.asnumpy())
8100        [[0.19908068 0.1985858 ]
8101        [0.19844866 0.19849943]]
8102    """
8103
8104    __mindspore_signature__ = (
8105        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8106        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8107        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8108        sig.make_sig('vhat', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8109        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T1),
8110        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T2),
8111        sig.make_sig('lr', dtype=sig.sig_dtype.T3),
8112        sig.make_sig('grad', dtype=sig.sig_dtype.T)
8113    )
8114
8115    @prim_attr_register
8116    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, use_locking=False):
8117        """Initialize ApplyAdamWithAmsgrad"""
8118        validator.check_value_type("beta1", beta1, [float], self.name)
8119        validator.check_value_type("beta2", beta2, [float], self.name)
8120        validator.check_value_type("epsilon", epsilon, [float], self.name)
8121        validator.check_value_type("use_locking", use_locking, [bool], self.name)
8122        self.add_prim_attr("side_effect_mem", True)
8123
8124
8125class ApplyAdamWithAmsgradV2(Primitive):
8126    r"""
8127    Update var according to the Adam algorithm.
8128
8129    .. math::
8130        \begin{array}{l1} \\
8131            lr_t:=learning\_rate*\sqrt{1-\beta_2^t}/(1-\beta_1^t) \\
8132            m_t:=\beta_1*m_{t-1}+(1-\beta_1)*g \\
8133            v_t:=\beta_2*v_{t-1}+(1-\beta_2)*g*g \\
8134            \hat v_t:=\max(\hat v_{t-1}, v_t) \\
8135            var:=var-lr_t*m_t/(\sqrt{\hat v_t}+\epsilon) \\
8136        \end{array}
8137
8138    :math:`t` represents updating step while :math:`m` represents the 1st moment vector,
8139    :math:`v` represents the 2nd moment vector,  :math:`\hat v_t` represents `vhat`,
8140    :math:`lr` represents learning rate,
8141    :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
8142    :math:`\beta_1^{t}` represents `beta1_power`, :math:`\beta_2^{t}` represents `beta2_power`,
8143    :math:`var` represents the variable to be updated,
8144    :math:`\epsilon` represents `epsilon`.
8145
8146    All of the inputs are consistent with implicit type conversion rules,
8147    which ensure that the data types are the same. If they have different data types, the lower precision data type
8148    will be converted to the data type with relatively higher precision.
8149
8150    Args:
8151        use_locking (bool): If ``True`` , updating of the `var`, `m`, and `v` tensors will
8152            be protected by a lock; Otherwise the behavior is undefined, but may exhibit less contention.
8153            Default: ``False`` .
8154
8155    Inputs:
8156        - **var** (Parameter) - Variable to be updated. The data type can be float16, float32 or float64.
8157        - **m** (Parameter) - The 1st moment vector in the updating formula,
8158          the shape should be the same as `var`.
8159        - **v** (Parameter) - The 2nd moment vector in the updating formula,
8160          the shape should be the same as `var`.
8161        - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
8162          the shape and data type value should be the same as `var`.
8163        - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
8164          with float16, float32 or float64 data type.
8165        - **beta2_power** (Union[float, Tensor]) - :math:`beta_2^t(\beta_2^{t})` in the updating formula,
8166          with float16, float32 or float64 data type.
8167        - **lr** (Union[float, Tensor]) - Learning rate, with float16, float32 or float64 data type.
8168        - **beta1** (Union[float, Tensor]) - Exponential decay rate of the first moment.
8169          The data type can be float16, float32 or float64.
8170        - **beta2** (Union[float, Tensor]) - Exponential decay rate of the second moment.
8171          The data type can be float16, float32 or float64.
8172        - **epsilon** (Union[float, Tensor]) - A value added to the denominator to ensure numerical stability.
8173          The data type can be float16, float32 or float64.
8174        - **grad** (Tensor) - The gradient, has the same shape as `var`.
8175
8176    Outputs:
8177        Tuple of 4 Tensors, the updated parameters.
8178
8179        - **var** (Tensor) - The same shape and data type as `var`.
8180        - **m** (Tensor) - The same shape and data type as `m`.
8181        - **v** (Tensor) - The same shape and data type as `v`.
8182        - **vhat** (Tensor) - The same shape and data type as `vhat`.
8183
8184    Raises:
8185        TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter.
8186        TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
8187            `lr`, `beta1` , `beta2` , `epsilon` or `grad` is not float64, float32 or float16.
8188        RuntimeError: If the data type of `var`, `m`, `v` , `vhat` and `grad` conversion of Parameter is not supported.
8189
8190    Supported Platforms:
8191        ``Ascend`` ``GPU`` ``CPU``
8192
8193    Examples:
8194        >>> from mindspore import ops
8195        >>> import mindspore.nn as nn
8196        >>> from mindspore import Tensor, Parameter
8197        >>> import numpy as np
8198        >>> class ApplyAdamWithAmsgradNet(nn.Cell):
8199        ...     def __init__(self, use_locking=False):
8200        ...         super(ApplyAdamWithAmsgradNet, self).__init__()
8201        ...         self.apply_adam_with_amsgrad = ops.ApplyAdamWithAmsgradV2(use_locking)
8202        ...         self.var = Parameter(Tensor(np.array([[0.2, 0.2], [0.2, 0.2]]).astype(np.float32)), name="var")
8203        ...         self.m = Parameter(Tensor(np.array([[0.1, 0.2], [0.4, 0.3]]).astype(np.float32)), name="m")
8204        ...         self.v = Parameter(Tensor(np.array([[0.2, 0.1], [0.3, 0.4]]).astype(np.float32)), name="v")
8205        ...         self.vhat = Parameter(Tensor(np.array([[0.1, 0.2], [0.6, 0.2]]).astype(np.float32)), name="vhat")
8206        ...         self.beta1 = 0.8
8207        ...         self.beta2 = 0.999
8208        ...         self.epsilon = 1e-8
8209        ...         self.beta1_power = 0.9
8210        ...         self.beta2_power = 0.999
8211        ...         self.lr = 0.01
8212        ...
8213        ...     def construct(self, grad):
8214        ...         out = self.apply_adam_with_amsgrad(self.var, self.m, self.v, self.vhat,
8215        ...                                            self.beta1_power, self.beta2_power, self.lr,
8216        ...                                            self.beta1, self.beta2, self.epsilon, grad)
8217        ...         return out
8218        >>> net = ApplyAdamWithAmsgradNet()
8219        >>> grad = Tensor(np.array([[0.4, 0.2], [0.2, 0.3]]).astype(np.float32))
8220        >>> output = net(grad)
8221        >>> print(net.var.asnumpy())
8222        [[0.19886853 0.1985858 ]
8223        [0.19853032 0.19849943]]
8224    """
8225
8226    __mindspore_signature__ = (
8227        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8228        sig.make_sig('m', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8229        sig.make_sig('v', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8230        sig.make_sig('vhat', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8231        sig.make_sig('beta1_power', dtype=sig.sig_dtype.T),
8232        sig.make_sig('beta2_power', dtype=sig.sig_dtype.T),
8233        sig.make_sig('lr', dtype=sig.sig_dtype.T),
8234        sig.make_sig('beta1', dtype=sig.sig_dtype.T),
8235        sig.make_sig('beta2', dtype=sig.sig_dtype.T),
8236        sig.make_sig('epsilon', dtype=sig.sig_dtype.T),
8237        sig.make_sig('grad', dtype=sig.sig_dtype.T)
8238    )
8239
8240    @prim_attr_register
8241    def __init__(self, use_locking=False):
8242        """Initialize ApplyAdamWithAmsgradv2"""
8243        validator.check_value_type("use_locking", use_locking, [bool], self.name)
8244        self.add_prim_attr("side_effect_mem", True)
8245
8246
8247class FractionalMaxPool(Primitive):
8248    r"""
8249    Performs fractional max pooling on the input.
8250
8251    Fractional max pooling is similar to regular max pooling, but with the added flexibility of
8252    allowing the overall reduction ratio `N` to be a non-integer value. In regular max pooling,
8253    an input set is reduced in size by taking the maximum value of  `N x N` (usually 2x2)
8254    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
8255
8256    In contrast, fractional max pooling uses randomly generated pool sizes that are fairly uniform in size.
8257
8258    .. warning::
8259        "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
8260        and last elements must be 1.0 because pooling on batch and channels dimensions is not allowed.
8261
8262    Args:
8263        pooling_ratio (list(float)): Decide the shape of output, is a list of float numbers has length >= 4.
8264            Pooling ratio for each dimension of value should not be less than 0, currently only support
8265            for row and col dimension.
8266        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
8267            If the pseudo_random parameter is set to ``True`` , the sequence will be generated in a
8268            pseudo-random fashion, otherwise it will be generated randomly.
8269            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
8270            by Benjamin Graham to understand the distinction between the two.
8271            Default: ``False`` .
8272        overlapping(bool, optional): When set to ``True`` , the values at the boundary of adjacent pooling cells
8273            will be shared by both cells during pooling process. When set to ``False`` , the values are not reused.
8274            Default: ``False`` .
8275        deterministic(bool, optional): If deterministic is set to ``True`` , a fixed pooling region will be used
8276            in the computation graph, ensuring that the FractionalMaxPool is deterministic.
8277            This is often used in unit tests. When set to ``False`` , fixed pool regions will not be used.
8278            Default: ``False`` .
8279        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
8280            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
8281            the generator will be seeded by a random seed.
8282            Default: ``0`` .
8283        seed2(int, optional): The second seed to avoid seed collision.
8284            Default: ``0`` .
8285
8286    Inputs:
8287        - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
8288          Tensor of shape :math:`(N, H_{in}, W_{in}, C_{in})`.
8289
8290    Outputs:
8291        - **y** (Tensor) - the output of FractionalMaxPool, has the same data type with `x`.
8292          Tensor of shape :math:`(N, H_{out}, W_{out}, C_{out})`.
8293
8294        - **row_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary rows.
8295
8296        - **col_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary cols.
8297
8298    Raises:
8299        TypeError: If data type of `x` is not float32, float64, int32, int64.
8300        TypeError: If `x` is not a 4D tensor.
8301        ValueError: If element of `x` equals 0 or is less than 0.
8302        ValueError: If `pooling_ratio` is a list whose length is not equal to 4.
8303        ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0.
8304
8305    Supported Platforms:
8306        ``Ascend`` ``GPU`` ``CPU``
8307
8308    Examples:
8309        >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64)
8310        >>> pooling_ratio=[1.0,1.5,1.5,1.0]
8311        >>> fractionalmaxpool_op = ops.FractionalMaxPool(pooling_ratio=pooling_ratio)
8312        >>> output = fractionalmaxpool_op(Tensor(x))
8313        >>> print(output)
8314        (Tensor(shape=[1, 2, 2, 1], dtype=Int64, value=
8315        [[[[ 6],
8316           [ 8]],
8317          [[14],
8318           [16]]]]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]))
8319    """
8320
8321    @prim_attr_register
8322    def __init__(self, pooling_ratio, pseudo_random=False, overlapping=False, deterministic=False, seed=0, seed2=0):
8323        """Initialize FractionalMaxPool."""
8324        self.init_prim_io_names(inputs=["x"], outputs=["y", "row_pooling_sequence", "col_pooling_sequence"])
8325        validator.check_value_type('pooling_ratio', pooling_ratio, [list], self.name)
8326        for item in pooling_ratio:
8327            validator.check_value_type("pooling_ratio_item", item, float, self.name)
8328        validator.check_value_type("pseudo_random", pseudo_random, [bool], self.name)
8329        validator.check_value_type("overlapping", overlapping, [bool], self.name)
8330        validator.check_value_type("deterministic", deterministic, [bool], self.name)
8331        validator.check_value_type("seed", seed, [int], self.name)
8332        validator.check_value_type("seed2", seed2, [int], self.name)
8333
8334
8335class FractionalMaxPool3DWithFixedKsize(Primitive):
8336    r"""
8337    Applies a 3D fractional max pooling to an input signal composed of multiple input planes.
8338    The max-pooling operation is applied in :math:`(kD, kH, kW)` regions by a stochastic step size determined by
8339    the target output size `output_shape`.
8340
8341    The number of output features is equal to the number of input planes.
8342
8343    Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_  for more details.
8344
8345    The input and output data format can be "NCDHW" and "NDHWC". N is the batch size, C is the number of channels,
8346    D the feature depth, H is the feature height, and W is the feature width.
8347
8348    .. warning::
8349        This is an experimental API that is subject to change or deletion.
8350
8351    Args:
8352        ksize (Union[float, tuple]): Size of the pooling window. `ksize` can be a tuple of three values specify a
8353            shape :math:`(k_D, k_H, k_W)`, or a single int `K` for :math:`(K, K, K)`.
8354        output_shape (Union[int, tuple]): The target output shape. `output_shape` can be a tuple of three values
8355            specify a shape :math:`(D_{out}, H_{out}, W_{out})`, or a single float `S` for :math:`(S, S, S)`.
8356        data_format (str, optional): The optional value for data format.
8357            Currently support ``'NCDHW'`` and ``'NHDWC'`` . Default: ``'NCDHW'`` .
8358
8359    Inputs:
8360        - **x** (Tensor) - The input of FractionalMaxPool3DWithFixedKsize, which is a 4D or 5D tensor.
8361          Tensor of data type : float16, float32, double, int32, int64.
8362          Supported shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(N, D_{in}, H_{in}, W_{in}, C)`.
8363        - **random_samples** (Tensor) - The random step of FractionalMaxPool3DWithFixedKsize, which is a 3D tensor.
8364          Tensor of data type : float16, float32, double, and value is between (0, 1).
8365          Supported shape :math:`(N, C, 3)`
8366
8367    Outputs:
8368        - **y** (Tensor) - A tensor, the output of FractionalMaxPool3DWithFixedKsize.
8369          Has the same data type with `x`.
8370          Tensor of shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(N, D_{out}, H_{out}, W_{out}, C)`.
8371        - **argmax** (Tensor) - A tensor, the indices along with the outputs.
8372          Has the same shape as the `y` and int32 or int64 data type.
8373
8374    Raises:
8375        TypeError: If `input_x` is not a 4D or 5D tensor.
8376        TypeError: If `random_samples` is not a 3D tensor.
8377        TypeError: If data type of `x` is not float16, float32, double, int32, int64.
8378        TypeError: If dtype of `random_samples` is not float16, float32, double.
8379        TypeError: If dtype of `argmax` is not int32, int64.
8380        ValueError: If `output_shape` is a tuple and if `output_shape` length is not 3.
8381        ValueError: If `ksize` is a tuple and if `ksize` length is not 3.
8382        ValueError: If numbers in `output_shape` or `ksize` is not positive.
8383        ValueError: If `data_format` is neither 'NCDHW' nor 'NDHWC'.
8384        ValueError: If the first dimension size of `input_x` and `random_samples` is not equal.
8385        ValueError: If the second dimension size of `input_x` and `random_samples` is not equal.
8386        ValueError: If the third dimension size of `random_samples` is not 3.
8387
8388    Supported Platforms:
8389        ``Ascend`` ``GPU`` ``CPU``
8390
8391    Examples:
8392        >>> import numpy as np
8393        >>> from mindspore import Tensor, ops
8394        >>> from mindspore import dtype as mstype
8395        >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
8396        ...       .reshape([1, 1, 2, 2, 4]), mstype.float32)
8397        >>> random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
8398        >>> ksize = (1, 1, 1)
8399        >>> output_shape = (1, 1, 2)
8400        >>> net = ops.FractionalMaxPool3DWithFixedKsize(ksize = ksize, output_shape = output_shape)
8401        >>> output, argmax = net(x, random_samples)
8402        >>> print(output)
8403        [[[[[13. 16.]]]]]
8404        >>> print(argmax)
8405        [[[[[12 15]]]]]
8406    """
8407
8408    @prim_attr_register
8409    def __init__(self, ksize, output_shape, data_format="NCDHW"):
8410        """Initialize FractionalMaxPool3DWithFixedKsize."""
8411        self.init_prim_io_names(inputs=["x", "random_samples"], outputs=["y", "argmax"])
8412        validator.check_value_type("ksize", ksize, [int, tuple], self.name)
8413        self.ksize = ksize
8414        if isinstance(self.ksize, int):
8415            self.ksize = (ksize, ksize, ksize)
8416        if len(self.ksize) != 3:
8417            raise ValueError(f"For '{self.name}', attr 'ksize' must be an positive int number or a tuple of "
8418                             f"three positive int numbers, but got {len(self.ksize)} numbers.")
8419        for item in self.ksize:
8420            validator.check_positive_int(item, 'ksize item', self.name)
8421        self.output_shape = validator.check_value_type("output_shape", output_shape, [int, tuple], self.name)
8422        self.data_format = validator.check_string(data_format, ['NCDHW', 'NDHWC'], 'data_format', self.name)
8423        self.output_shape = _check_3d_int_or_tuple("output_shape", output_shape,
8424                                                   self.name, allow_five=False, ret_five=False)
8425        self.add_prim_attr("ksize", self.ksize)
8426        self.add_prim_attr("output_shape", self.output_shape)
8427
8428
8429class FractionalAvgPool(Primitive):
8430    r"""
8431    Performs fractional avg pooling on the input.
8432
8433    Fractional avg pooling is similar to regular avg pooling, but with the added flexibility of
8434    allowing the overall reduction ratio `N` to be a non-integer value. In regular avg pooling,
8435    an input set is reduced in size by taking the average value of  `N x N` (usually 2x2)
8436    subsections of the set, with the goal of reducing the set by a factor of `N`, where `N` is an integer.
8437
8438    .. warning::
8439        "pooling_ratio", currently only supports row and col dimension and should be >= 1.0, the first
8440        and last elements must be 1.0 because we don't allow pooling on batch and channels dimensions.
8441
8442    Args:
8443        pooling_ratio (list(float)): Decide the shape of output, is a list of floats that has length >= 4.
8444            Pooling ratio for each dimension of value should be >=0, currently only support for row and col
8445            dimension. The first and last elements must be 1.0 because we don't allow pooling on batch and
8446            channels dimensions.
8447        pseudo_random(bool, optional): Generate the pooling sequence either randomly or pseudo-randomly.
8448            If the pseudo_random parameter is set to ``True`` , the sequence will be generated in a
8449            pseudo-random fashion, otherwise it will be generated randomly.
8450            Refer to `Fractional Max-Pooling  <https://arxiv.org/pdf/1412.6071>`_
8451            by Benjamin Graham to understand the distinction between the two.
8452            Default: ``False`` .
8453        overlapping(bool, optional): When set to ``True`` , the values at the boundary of adjacent pooling cells
8454            will be shared by both cells during pooling process. When set to ``False`` , the values are not reused.
8455            Default: ``False`` .
8456        deterministic(bool, optional): If deterministic is set to ``True`` , a fixed pooling region will be used
8457            in the computation graph, ensuring that the FractionalAvgPool is deterministic.
8458            This is often used in unit tests. When set to ``False`` , fixed pool regions will not be used.
8459            Default: ``False`` .
8460        seed(int, optional): If either seed or seed2 are set to a non-zero value, the random number
8461            generator will be seeded using the specified seed. If neither seed nor seed2 are set,
8462            the generator will be seeded by a random seed.
8463            Default: ``0`` .
8464        seed2(int, optional): The second seed to avoid seed collision.
8465            Default: ``0`` .
8466
8467    Inputs:
8468        - **x** (Tensor) -The data type must be one of the following types: float32, float64, int32, int64.
8469          Tensor of shape :math:`(N, H_{in}, W_{in}, C_{in})`.
8470
8471    Outputs:
8472        - **y** (Tensor) - A tensor, the output of FractionalAvgPool, has the same data type with `x`.
8473          Tensor of shape :math:`(N, H_{out}, W_{out}, C_{out})`.
8474
8475        - **row_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary rows.
8476
8477        - **col_pooling_sequence** (Tensor) - A tensor of type int64, the result list of pool boundary cols.
8478
8479    Raises:
8480        TypeError: If data type of `x` is not float32, float64, int32, int64.
8481        TypeError: If `x` is not a 4D tensor.
8482        ValueError: If element of `x` equals 0 or is less than 0.
8483        ValueError: If `pooling_ratio` is a list whose length is not equal to 4.
8484        ValueError: If the first and last element of `pooling_ratio` is not equal to 1.0.
8485
8486    Supported Platforms:
8487        ``Ascend`` ``GPU`` ``CPU``
8488
8489    Examples:
8490        >>> x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]).reshape([1,4,4,1]).astype(np.int64)
8491        >>> pooling_ratio=[1.0,1.5,1.5,1.0]
8492        >>> fractionalavgpool_op = ops.FractionalAvgPool(pooling_ratio=pooling_ratio)
8493        >>> output = fractionalavgpool_op(Tensor(x))
8494        >>> print(output)
8495        (Tensor(shape=[1, 2, 2, 1], dtype=Int64, value=
8496        [[[[ 3],
8497           [ 5]],
8498          [[11],
8499           [13]]]]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]), Tensor(shape=[3], dtype=Int64, value= [0, 2, 4]))
8500    """
8501
8502    @prim_attr_register
8503    def __init__(self, pooling_ratio, pseudo_random=False, overlapping=False, deterministic=False, seed=0, seed2=0):
8504        """Initialize FractionalAvgPool."""
8505        self.init_prim_io_names(inputs=["x"], outputs=["y", "row_pooling_sequence", "col_pooling_sequence"])
8506        validator.check_value_type('pooling_ratio', pooling_ratio, [list], self.name)
8507        for item in pooling_ratio:
8508            validator.check_value_type("pooling_ratio_item", item, float, self.name)
8509        validator.check_value_type("pseudo_random", pseudo_random, [bool], self.name)
8510        validator.check_value_type("overlapping", overlapping, [bool], self.name)
8511        validator.check_value_type("deterministic", deterministic, [bool], self.name)
8512        validator.check_value_type("seed", seed, [int], self.name)
8513        validator.check_value_type("seed2", seed2, [int], self.name)
8514
8515
8516class NthElement(Primitive):
8517    r"""
8518    Computes the n-th smallest values for the last dimension of the input Tensor.
8519
8520    - When `input` is a 1-D Tensor (i.e. Vector), it finds the nth-smallest value in the vector
8521      and outputs its value as a scalar Tensor.
8522    - When `input` is matrices or has higher rank, it finds the nth-smallest value
8523      in each row (or vector along the last dimension) and outputs
8524      these values in a Tensor with shape of `values.shape = input.shape[:-1]`.
8525
8526    Args:
8527        reverse (bool, optional): An optional bool. If set to ``True`` , it find the :math:`n`-th largest value
8528          in the vector instead of the nth-smallest. Default: ``False`` .
8529
8530    Inputs:
8531        - **input** (Tensor) - Input Tensor with 1-D or higher dimension.
8532        - **n** (Union[int, Tensor]) -  If the `n` is a Tensor, it should be a 0-D Tensor, dtype is int32.
8533          Valid range of `n` is :math:`[0, input.shape[-1])` where :math:`input.shape[-1]` is
8534          last dimension size of `input`.
8535
8536    Outputs:
8537        - **values** (Tensor) - Its shape satisfies:  `values`.shape = `input`.shape[:-1].
8538          The dtype is the same as `input`.
8539
8540    Raises:
8541        TypeError**: If the type  of `input` is out of the valid list.
8542        TypeError**: If `n` is not int32 or not a Tensor.
8543        ValueError**: If n is out of :math:`[0, input.shape[-1])`.
8544
8545    Supported Platforms:
8546        ``Ascend`` ``GPU`` ``CPU``
8547
8548    Examples:
8549        >>> input = Tensor(np.array([[1,2,3],[4,5,6]]) , mstype.int8)
8550        >>> n = 1
8551        >>> net = ops.NthElement()
8552        >>> out = net(input, n)
8553        >>> print(out)
8554        [2 5]
8555    """
8556
8557    @prim_attr_register
8558    def __init__(self, reverse=False):
8559        """Initialize NthElement."""
8560        self.reverse = validator.check_value_type("reverse", reverse, [bool], self.name)
8561        self.add_prim_attr("reverse", self.reverse)
8562        self.init_prim_io_names(inputs=['input', 'n'],
8563                                outputs=['output'])
8564
8565
8566class PSROIPooling(Primitive):
8567    r"""
8568    Applies Position Sensitive ROI-Pooling on input Tensor.
8569
8570    Args:
8571        spatial_scale (float): a scaling factor that maps the box coordinates to the input coordinates.
8572                               For example, if your boxes are defined on the scale of a 224x224 image and
8573                               your input is a 112x112 feature map (resulting from a 0.5x scaling of the original
8574                               image), you'll want to set this to 0.5.
8575        group_size (int): the size of the output (in pixels) after the pooling is performed, as (height, width).
8576        output_dim (int): the dim of the output after the pooling is performed.
8577
8578    Inputs:
8579        - **features** (Tensor) - The input features, whose shape must be :math:`(N, C, H, W)`. With data type is
8580          float16 or float32. This formula should hold: :math:`(C == output\_dim * group\_size * group\_size)`.
8581        - **rois** (Tensor) - The shape is `(batch, 5, rois_n)`. With data type of float16 or float32.
8582          The size of first dimension `batch` is batch_size. The size of the second dimension must be `5`.
8583          The size of third dimension `rois_n` is the number of rois. The value of `rois` like:
8584          (index, x1, y1, x2, y2). The first element of `rois_n` is the index of the `rois`. And the box coordinates
8585          in (x1, y1, x2, y2) format where the regions will be taken from. The coordinate must satisfy
8586          0 <= x1 < x2 and 0 <= y1 < y2.
8587
8588    Outputs:
8589        - **out** (Tensor) - The result after pooling. Its shape
8590          is :math:`(rois.shape[0] * rois.shape[2], output\_dim, group\_size, group\_size)`.
8591
8592    Raises:
8593        TypeError: If `spatial_scale` is not a float.
8594        TypeError: If `group_size` or `output_dim` is not an int.
8595        TypeError: If `features` or `rois` is not a Tensor.
8596        TypeError: If dtype of `rois` is not float16 or float32.
8597        ValueError: If shape of `features` does not satisfy :math:`(C == output\_dim * group\_size * group\_size)`.
8598        ValueError: If `spatial_scale` is negative.
8599
8600    Supported Platforms:
8601        ``Ascend``
8602
8603    Examples:
8604        >>> import mindspore
8605        >>> import numpy as np
8606        >>> from mindspore import Tensor, ops
8607        >>> features = np.random.randn(4, 3 * 7 * 7, 80, 48)
8608        >>> features = Tensor.from_numpy(features).astype(mindspore.float32)
8609        >>> rois = Tensor.from_numpy(
8610        ...     np.array([[[0.0000],
8611        ...                [150.3563],
8612        ...                [200.1320],
8613        ...                [579.3563],
8614        ...                [602.3452]],
8615        ...               [[1.0000],
8616        ...                [657.1263],
8617        ...                [302.8564],
8618        ...                [762.4214],
8619        ...                [567.9854]],
8620        ...               [[2.0000],
8621        ...                [321.3122],
8622        ...                [232.2410],
8623        ...                [679.0281],
8624        ...                [587.6346]],
8625        ...               [[3.0000],
8626        ...                [664.1630],
8627        ...                [387.4919],
8628        ...                [778.7322],
8629        ...                [562.7321]]])).astype(mindspore.float32)
8630        >>> psROIPooling = ops.PSROIPooling(spatial_scale=1.0/16, output_dim=3,
8631        ...                                       group_size=7)
8632        >>> out = psROIPooling(features, rois)
8633        >>> print(out.shape)
8634        (4, 3, 7, 7)
8635        >>> print(out.dtype)
8636        Float32
8637    """
8638
8639    @prim_attr_register
8640    def __init__(self, spatial_scale, group_size, output_dim):
8641        """Initialize PSROIPooling"""
8642        validator.check_positive_float(spatial_scale, "spatial_scale", self.name)
8643        validator.check_positive_int(group_size, "group_size", self.name)
8644        validator.check_positive_int(output_dim, "output_dim", self.name)
8645        self.spatial_scale = spatial_scale
8646        self.group_size = group_size
8647        self.output_dim = output_dim
8648
8649        self.add_prim_attr('spatial_scale', self.spatial_scale)
8650        self.add_prim_attr('group_size', self.group_size)
8651        self.add_prim_attr('output_dim', self.output_dim)
8652
8653
8654class TripletMarginLoss(Primitive):
8655    r"""
8656    TripletMarginLoss operation.
8657
8658    Creates a criterion that measures the triplet loss given an input
8659    tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`.
8660    This is used for measuring a relative similarity between samples. A triplet
8661    is composed by `a`, `p` and `n` (i.e., `anchor`, `positive examples` and `negative
8662    examples` respectively). The shapes of all input tensors should be
8663    :math:`(N, D)`.
8664
8665    The distance swap is described in detail in the paper
8666    `Learning local feature descriptors with triplets and shallow convolutional neural
8667    networks <http://158.109.8.37/files/BRP2016.pdf>`_
8668    by V. Balntas, E. Riba et al.
8669
8670    The loss function for each sample in the mini-batch is:
8671
8672    .. math::
8673        L(a, p, n) = \max \{d(a_i, p_i) - d(a_i, n_i) + {\rm margin}, 0\}
8674
8675    where
8676
8677    .. math::
8678        d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p
8679
8680    Args:
8681        p (int, optional): The norm degree for pairwise distance. Default: ``2`` .
8682        eps (float, optional): Default: ``1e-6`` .
8683        swap (bool, optional): The distance swap. Default: ``False`` .
8684        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
8685            ``'sum'`` . Default: ``'mean'`` .
8686
8687            - ``'none'``: no reduction will be applied.
8688            - ``'mean'``: compute and return the mean of elements in the output.
8689            - ``'sum'``: the output elements will be summed.
8690
8691    Inputs:
8692        - **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType.
8693        - **positive** (Tensor) - A sample belonging to the same category as x,
8694          with the same type and shape as `x`.
8695        - **negative** (Tensor) - A sample belonging to the different class from x,
8696          with the same type and shape as `x`.
8697        - **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
8698
8699    Outputs:
8700        Union[Tensor, Scalar], if `reduction` is ``"none"``, its shape is :math:`(N)`.
8701        Otherwise, a scalar value will be returned.
8702
8703    Raises:
8704        TypeError: If `x` or `positive` or `negative` or `margin` is not a Tensor.
8705        TypeError: If dtype of `x` or `positive` or `negative` is not BasicType.
8706        TypeError: If dtype of `x`, `positive` and `negative` is not the same.
8707        TypeError: If `margin` is not float32.
8708        TypeError: If `p` is not an int.
8709        TypeError: If `eps` is not a float.
8710        TypeError: If `swap` is not a bool.
8711        ValueError: If dimensions of input `x`, `positive` and `negative` are
8712          less than or equal to 1 at the same time.
8713        ValueError: If the dimension of input `x` or `positive` or `negative`
8714          is bigger than or equal to 8.
8715        ValueError: If length of shape of `margin` is not 0.
8716        ValueError: If shape of `x`, `positive` and `negative` cannot broadcast.
8717        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
8718
8719    Supported Platforms:
8720        ``GPU``
8721
8722    Examples:
8723        >>> import mindspore
8724        >>> import numpy as np
8725        >>> from mindspore import Tensor, ops
8726        >>> loss = ops.TripletMarginLoss()
8727        >>> x = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
8728        >>> positive = Tensor(np.array([[0.4, 0.6], [0.4, 0.6]]), mindspore.float32)
8729        >>> negative = Tensor(np.array([[0.2, 0.9], [0.3, 0.7]]), mindspore.float32)
8730        >>> margin = Tensor(1.0, mindspore.float32)
8731        >>> output = loss(x, positive, negative, margin)
8732        >>> print(output)
8733        0.8881968
8734    """
8735
8736    @prim_attr_register
8737    def __init__(self, p=2, swap=False, eps=1e-6, reduction="mean"):
8738        """Initialize TripletMarginLoss"""
8739        self.init_prim_io_names(inputs=['x', 'positive', 'negative', 'margin'], outputs=['y'])
8740        validator.check_value_type("p", p, [int], self.name)
8741        validator.check_value_type("swap", swap, [bool], self.name)
8742        validator.check_value_type("eps", eps, [float], self.name)
8743        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
8744
8745
8746class DeformableOffsets(Primitive):
8747    r"""
8748    Computes the deformed convolution output with the expected input.
8749
8750    Refer to :func:`mindspore.ops.deformable_conv2d` for more details.
8751
8752    Supported Platforms:
8753        ``Ascend`` ``GPU`` ``CPU``
8754    """
8755
8756    @prim_attr_register
8757    def __init__(self,
8758                 strides,
8759                 pads,
8760                 ksize,
8761                 dilations=(1, 1, 1, 1),
8762                 data_format="NCHW",
8763                 deformable_groups=1,
8764                 modulated=True):
8765        """Initialize DeformableOffsets"""
8766        self.init_prim_io_names(inputs=['x', 'offsets'], outputs=['y'])
8767
8768        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'data_format', self.name)
8769        pos_c = 1
8770        if self.format == "NHWC":
8771            pos_c = 3
8772        self.add_prim_attr('format', self.format)
8773
8774        validator.check_size_and_element_type_of_tuple('strides', strides, 4, int, self.name)
8775        if strides[0] != 1 or strides[pos_c] != 1:
8776            raise ValueError(f"For '{self.name}', The N and C dimensions of 'strides' must be set to 1.")
8777        self.add_prim_attr('strides', strides)
8778
8779        validator.check_size_and_element_type_of_tuple('pads', pads, 4, int, self.name)
8780        self.add_prim_attr('pads', pads)
8781
8782        validator.check_size_and_element_type_of_tuple('kernel_size', ksize, 2, int, self.name)
8783        self.add_prim_attr('ksize', ksize)
8784
8785        validator.check_size_and_element_type_of_tuple('dilations', dilations, 4, int, self.name)
8786        if dilations[0] != 1 or dilations[pos_c] != 1:
8787            raise ValueError(f"For '{self.name}', The N and C dimensions of 'dilations' must be set to 1.")
8788        self.add_prim_attr('dilations', dilations)
8789
8790        self.deformable_groups = validator.check_positive_int(deformable_groups, 'deformable_groups', self.name)
8791        self.add_prim_attr('deformable_groups', self.deformable_groups)
8792
8793        self.modulated = validator.check_bool(modulated, 'modulated', self.name)
8794        if self.modulated is not True:
8795            raise ValueError(f"For '{self.name}', The modulated must be set to True.")
8796        self.add_prim_attr('modulated', self.modulated)
8797
8798
8799class Pdist(Primitive):
8800    r"""
8801    Computes the p-norm distance between each pair of row vectors in the input.
8802
8803    Refer to :func:`mindspore.ops.pdist` for more details.
8804
8805    Note:
8806        The pdist operator involves exponentiation, the inf/nan calculation result may be generated
8807        when the float16 input is used. The float32 input is recommended.
8808
8809    Args:
8810        p (float, optional): The order of norm distance, :math:`p∈[0, ∞)`. Default: ``2.0`` .
8811
8812    Inputs:
8813        - **x** (Tensor) - Input tensor. Supported dtypes: float16, float32 or float64.
8814
8815    Outputs:
8816        Tensor, has the same dtype as `x`.
8817
8818    Supported Platforms:
8819        ``GPU`` ``CPU``
8820
8821    Examples:
8822        >>> from mindspore import Tensor, ops
8823        >>> import numpy as np
8824        >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32))
8825        >>> op = ops.Pdist(p=2.0)
8826        >>> y = op(x)
8827        >>> print(y)
8828        [1.4142135 2.828427  1.4142135]
8829    """
8830
8831    @prim_attr_register
8832    def __init__(self, p=2.0):
8833        """Initialize Pdist"""
8834        validator.check_value_type("p", p, [float], self.name)
8835        if p < 0:
8836            raise ValueError('Pdist p must be a non-negative value, but got `{}`.'.format(p))
8837        self.init_prim_io_names(inputs=['x'], outputs=['y'])
8838
8839
8840class SparseApplyAdagradDA(Primitive):
8841    r"""
8842    Update `var` according to the proximal adagrad scheme.
8843
8844    .. math::
8845        \begin{array}{ll} \\
8846            grad_accum += grad \\
8847            grad_squared_accum += grad * grad \\
8848            tmp_val=sign(grad_accum) * max\left \{|grad_accum|-l1*global_step, 0\right \}
8849                    if l1>0 else grad_accum \\
8850            x_value = -1 * lr * tmp_val \\
8851            y_value = l2 * global_step * lr + \sqrt{grad_squared_accum} \\
8852            var = x_value / y_value
8853        \end{array}
8854
8855    Inputs of `var`, `grad_accum`, `grad_square_accum` and `grad`
8856    comply with the implicit type conversion rules to make the data types consistent.
8857    If they have different data types, lower priority data type will be converted to the
8858    relatively highest priority data type.
8859
8860    Args:
8861        use_locking (bool): If ``True`` , updating of the `var` and `accum` tensors will be protected by a lock.
8862                            Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
8863
8864    Inputs:
8865        - **var** (Parameter) - Variable to be updated.
8866          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
8867        - **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same
8868          shape and dtype as `var`.
8869        - **grad_square_accum** (Parameter) - The dict of mutable tensor grad_square_accum.
8870          Must have the same shape and dtype as `var`.
8871        - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
8872        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
8873          If there are duplicates in `indices`, the behavior is undefined. Must be one of the
8874          following types: int32, int64 and indices.shape[0] = grad.shape[0].
8875        - **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. Must have the same type as `var`.
8876        - **l1** (Union[Number, Tensor]) -  L1 regularization. Must be a scalar. Must have the same type as `var`.
8877        - **l2** (Union[Number, Tensor]) -  L2 regularization. Must be a scalar. Must have the same type as `var`.
8878        - **global_step** (Union[Number, Tensor]) - Training step number. Must be a scalar.
8879          Must be one of the following types: int32, int64.
8880
8881    Outputs:
8882        Tensor, with the same type and shape as 'var'.
8883
8884    Raises:
8885        TypeError: If `var`, `grad_accum`, `grad_square_accum` is not a Parameter.
8886        TypeError: If `grad` is not a Tensor.
8887        TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
8888        TypeError: If use_locking is not a bool.
8889        TypeError: If dtype of `var`, `grad_accum`, `grad_square_accum`, `grad_accum` is not the same.
8890        TypeError: If dtype of `grad_accum`, `grad_square_accum`, `grad_accum`
8891                     is not same as `var`.
8892        TypeError: If dtype of `indices` is neither int32 nor int64.
8893        TypeError: If shape of `indices` is not same as shape of first dimension of `grad`.
8894        TypeError: If dtype of `global_step` is not int64.
8895        ValueError: If the shape size of `lr`, `l1`, `l2` and `global_step` is not 0.
8896        RuntimeError: If the data type of `var`, `grad_accum`, `grad_square_accum` and `grad`
8897                      conversion of Parameter is not supported.
8898
8899    Supported Platforms:
8900        ``GPU`` ``CPU``
8901
8902    Examples:
8903        >>> var = Parameter(Tensor(np.array([[1,2], [1,2]]).astype(np.float32)))
8904        >>> grad_accum = Parameter(Tensor(np.array([[2,1], [3,1]]).astype(np.float32)))
8905        >>> grad_square_accum = Parameter(Tensor(np.array([[4,1], [5,1]]).astype(np.float32)))
8906        >>> grad = Tensor(np.array([[5,1], [6,1]]).astype(np.float32))
8907        >>> indices = Tensor(np.array([0, 1], dtype=np.int32))
8908        >>> lr = Tensor(2, mstype.float32)
8909        >>> l1 = Tensor(-1, mstype.float32)
8910        >>> l2 = Tensor(1, mstype.float32)
8911        >>> global_step=Tensor(1, mstype.int64)
8912        >>> sparse_apply_adagrad_da = nn_ops.SparseApplyAdagradDA()
8913        >>> output = sparse_apply_adagrad_da(var, grad_accum, grad_square_accum,
8914        ...                                  grad, indices, lr, l1, l2, global_step)
8915        >>> print(output)
8916        [[-1.8956923 -1.1715728]
8917         [-2.1420605 -1.1715728]]
8918    """
8919
8920    __mindspore_signature__ = (
8921        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8922        sig.make_sig('grad_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8923        sig.make_sig('grad_square_accum', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
8924        sig.make_sig('grad', dtype=sig.sig_dtype.T),
8925        sig.make_sig('indices', dtype=sig.sig_dtype.T1),
8926        sig.make_sig('lr', dtype=sig.sig_dtype.T),
8927        sig.make_sig('l1', dtype=sig.sig_dtype.T),
8928        sig.make_sig('l2', dtype=sig.sig_dtype.T),
8929        sig.make_sig('global_step', dtype=sig.sig_dtype.T2)
8930    )
8931
8932    @prim_attr_register
8933    def __init__(self, use_locking=False):
8934        """Initialize SparseApplyAdagradDA"""
8935        self.init_prim_io_names(inputs=['var', 'grad_accum', 'grad_square_accum',
8936                                        'grad', 'indices', 'lr', 'l1', 'l2', 'global_step'],
8937                                outputs=['var'])
8938        validator.check_value_type("use_locking", use_locking, [bool], self.name)
8939
8940
8941class SparseApplyMomentum(Primitive):
8942    r"""
8943    Update relevant entries in '*var' and '*accum' according to the momentum scheme.
8944
8945    .. math::
8946        \begin{array}{ll} \\
8947            accum = accum * momentum + grad \\
8948            var -= lr * accum
8949        \end{array}
8950
8951    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
8952    to make the data types consistent.
8953    If they have different data types, lower priority data type will be converted to
8954    the relatively highest priority data type.
8955
8956    Args:
8957        use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
8958            Default: ``False`` .
8959        use_nesterov (bool): If `True`, the tensor passed to compute grad will be var + momentum * accum,
8960            so in the end, the var you get is actually var + momentum * accum. Default: ``False`` .
8961
8962    Inputs:
8963        - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64,
8964          uint8, uint16, uint32, uint64, float16, float32 or float64.
8965          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
8966        - **accum** (Parameter) - Variable tensor to be updated, has the same shape and type as `var`.
8967        - **lr** (Union[Number, Tensor]) - The learning rate value. Must be a scalar with same type as `var`.
8968        - **grad** (Tensor) - A tensor for gradient, has the same type as `var`,
8969          and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
8970        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
8971          If there are duplicates in `indices`, the behavior is undefined. Must be one of the
8972          following types: int32, int64 and indices.shape[0] = grad.shape[0].
8973        - **momentum** (Union[Number, Tensor]) - Momentum. Must be a scalar with same type as `var`.
8974
8975    Outputs:
8976        - **var** (Tensor) - Tensor, has the same shape and type as 'var'.
8977
8978    Raises:
8979        TypeError: If `var`, `accum`, `grad` or `indices` is not a Parameter.
8980        TypeError: If `lr`, `momentum` is neither a Number nor a Tensor.
8981        TypeError: If `use_locking` or `use_nesterov` is not a bool.
8982        TypeError: If dtype of `var`, `accum`, `lr`, `grad`, or `momentum` is not one of int8, int16,
8983                   int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64.
8984        TypeError: If dtype of `indices` is neither int32 nor int64.
8985        ValueError: If the shape of `var`, `accum` or `grad` is rank 0.
8986        ValueError: If shape of `accum` or `grad` is not same as `var`.
8987        ValueError: If shape of `indices` is not same as the shape of first dimension of `grad`.
8988        ValueError: If the shape of `lr` or `momentum` is not rank 0.
8989        RuntimeError: If the data type of `var`, `accum`, `lr`, `grad` and 'momentum' conversion of Parameter
8990                      is not supported.
8991
8992    Supported Platforms:
8993        ``GPU`` ``CPU``
8994
8995    Examples:
8996        >>> import mindspore.ops.operations.nn_ops as nn_ops
8997        >>> var = Tensor(np.array([[4.1, 7.2], [1.1, 3.0]]).astype(np.float32))
8998        >>> accum = Tensor(np.array([[2.2, 3.0], [3.1, 0.5]]).astype(np.float32))
8999        >>> lr = Tensor(0.01, mstype.float32)
9000        >>> grad = Tensor(np.array([[0.3, 0.2], [0.4, 0.1]]).astype(np.float32))
9001        >>> indices = Tensor(np.array([0, 1]), mstype.int32)
9002        >>> momentum = Tensor(0.99, mstype.float32)
9003        >>> sparse_apply_momentum = nn_ops.SparseApplyMomentum()
9004        >>> output = sparse_apply_momentum(var, accum, lr, grad, indices, momentum)
9005        >>> print(output)
9006        [[4.07522   7.1682997]
9007         [1.06531   2.99405  ]]
9008    """
9009
9010    __mindspore_signature__ = (
9011        sig.make_sig('var', dtype=sig.sig_dtype.T),
9012        sig.make_sig('accum', dtype=sig.sig_dtype.T),
9013        sig.make_sig('lr', dtype=sig.sig_dtype.T),
9014        sig.make_sig('grad', dtype=sig.sig_dtype.T),
9015        sig.make_sig('indices', dtype=sig.sig_dtype.T1),
9016        sig.make_sig('momentum', dtype=sig.sig_dtype.T)
9017    )
9018
9019    @prim_attr_register
9020    def __init__(self, use_locking=False, use_nesterov=False):
9021        """Initialize SparseApplyMomentum"""
9022        self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'grad', 'indices', 'momentum'],
9023                                outputs=['var'])
9024        validator.check_value_type("use_locking", use_locking, [bool], self.name)
9025        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name)
9026
9027
9028class SparseApplyProximalGradientDescent(Primitive):
9029    r"""
9030    Sparse update '*var' as FOBOS algorithm with fixed learning rate.
9031
9032    .. math::
9033        \begin{array}{ll} \\
9034            \text{prox_v} = var - alpha \\
9035            var = sign(\text{prox_v})/(1 + alpha * l2) * \max(\left| \text{prox_v} \right| - alpha * l1,0)
9036        \end{array}
9037
9038    Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
9039    If they have different data types, the lower priority data type will be converted to
9040    the relatively highest priority data type.
9041
9042    Args:
9043        use_locking (bool): If ``True`` , the `var` tensors will be protected from being updated.
9044            Default: ``False`` .
9045
9046    Inputs:
9047        - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64,
9048          uint8, uint16, uint32, uint64, float16, float32 or float64.
9049          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
9050        - **alpha** (Union[Number, Tensor]) - Scaling factor. Must be a scalar with same type as `var`.
9051        - **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar with same type as `var`.
9052        - **l2** (Union[Number, Tensor]) - l2 regularization. Must be a scalar with same type as `var`.
9053        - **grad** (Tensor) - A tensor for gradient, has the same type as `var`,
9054          and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
9055        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
9056          If there are duplicates in `indices`, the behavior is undefined. Must be one of the
9057          following types: int32, int64 and indices.shape[0] = grad.shape[0].
9058
9059    Outputs:
9060        - **var** (Tensor) - Tensor, has the same shape and type as 'var'.
9061
9062    Raises:
9063        TypeError: If `var`, `grad` or `indices` is not a Parameter..
9064        TypeError: If `alpha`, `l1`, `l2` is neither a Number nor a Tensor.
9065        TypeError: If `use_locking` is not a bool.
9066        TypeError: If dtype of `var`, `alpha`, `l1`, `l2` or `grad` is not one of int8, int16,
9067                   int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64.
9068        TypeError: If dtype of `indices` is neither int32 nor int64.
9069        ValueError: If the shape of `var` or `grad` is rank 0.
9070        ValueError: If shape of `grad` is not same as `var`.
9071        ValueError: If the shape of `alpha`, `l1` or `l2` is not rank 0.
9072        ValueError: If shape of `indices` is not same as the shape of first dimension of `grad`.
9073        RuntimeError: If the data type of `var`, `alpha`, `l1`, `l2`, `grad` conversion of Parameter
9074                      is not supported.
9075
9076    Supported Platforms:
9077        ``GPU`` ``CPU``
9078
9079    Examples:
9080        >>> import mindspore.ops.operations.nn_ops as nn_ops
9081        >>> var = Tensor(np.array([[4.1, 7.2], [1.1, 3.0]]).astype(np.float32))
9082        >>> alpha = Tensor(1.0, mstype.float32)
9083        >>> l1 = Tensor(1.0, mstype.float32)
9084        >>> l2 = Tensor(0.0, mstype.float32)
9085        >>> grad = Tensor(np.array([[1, 1], [1, 1]]).astype(np.float32))
9086        >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
9087        >>> sparse_apply_proximal_gradient_descent = nn_ops.SparseApplyProximalGradientDescent()
9088        >>> output = sparse_apply_proximal_gradient_descent(var, alpha, l1, l2, grad, indices)
9089        >>> print(output)
9090        [[2.1 5.2]
9091         [0.  1. ]]
9092    """
9093
9094    __mindspore_signature__ = (
9095        sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
9096        sig.make_sig('alpha', dtype=sig.sig_dtype.T),
9097        sig.make_sig('l1', dtype=sig.sig_dtype.T),
9098        sig.make_sig('l2', dtype=sig.sig_dtype.T),
9099        sig.make_sig('grad', dtype=sig.sig_dtype.T),
9100        sig.make_sig('indices', dtype=sig.sig_dtype.T1)
9101    )
9102
9103    @prim_attr_register
9104    def __init__(self, use_locking=False):
9105        """Initialize SparseApplyProximalGradientDescent."""
9106        self.init_prim_io_names(inputs=['var', 'alpha', 'l1', 'l2', 'grad', 'indices'],
9107                                outputs=['var'])
9108        validator.check_value_type("use_locking", use_locking, [bool], self.name)
9109
9110
9111class NuclearNorm(Primitive):
9112    r"""
9113    Returns the matrix nuclear norm of a given Tensor.
9114
9115    Attr `dim` specifies which two dimensions of the input `x` to calculate the nuclear norm across. If `dim` is None,
9116    the nuclear norm will be calculated across all dimensions of input. Because the nuclear norm is the sum of the
9117    singular values of the matrix, the input at this time should be 2-dimensional. That is, if the input is
9118    2-dimensional, we compute the nuclear norm of the input matrix. At this point, `dim` should be None. If you set
9119    `dim`, it also needs to be in the proper range, otherwise it wonn't work. If the input is 3-dimensional and above,
9120    the attribute `dim` is required. It specifies which two dimensions of input to calculate the nuclear norm across.
9121
9122    According to the `dim` list, the input Tensor is reordered by `dim`. The two dimensions pointed to by the attribute
9123    `dim` are placed at the end, and the order of the other dimensions is relatively unchanged. Perform the SVD of each
9124    slice of the adjusted Tensor to obtain the singular value. Sum all of the singular value of each slice/matrix to
9125    obtain the nuclear norm.
9126
9127    Args:
9128        dim (Union[list(int), tuple(int)], optional): Specifies which two
9129            dimensions of `x` to calculate the matrix nuclear norm
9130            across. If `dim` is None, the nuclear norm will be calculated across all dimensions of `x`. The length of
9131            `dim` should be 2. The value in `dim` should be in this range:[-x_rank, x_rank). x_rank is the dimension of
9132            Tensor `x`. The value of `dim[0]` or `dim[1]` can not point to the same dimension. Default: ``None`` .
9133        keepdim (bool, optional): Whether the output Tensor have `dim` retained or not. Default: ``False`` .
9134
9135    Inputs:
9136        - **x** (Tensor) - Input to compute the matrix nuclear norm. The dimension of `x` should be greater than or
9137          equal to 2. Data type must be float32 or float64.
9138
9139    Outputs:
9140        Tensor, output Tensor with dimensions in `dim` reduced to 1 will be returned if `keepdim` is `True`;
9141        otherwise a Tensor with dimensions in `dim` removed is returned. The data type is same as `x`.
9142
9143    Raises:
9144        TypeError: If `x` is not a Tensor.
9145        TypeError: If dtype of `x` is neither float32 nor float64.
9146        TypeError: If dtype of `dim` is neither list(int) nor tuple(int).
9147        TypeError: If dtype of `keepdim` is not bool.
9148        ValueError: If dimension of Tensor `x` is less than 2.
9149        ValueError: If the length of `dim` is not 2 when `dim` is set.
9150        ValueError: If the dimension of Tensor `x` is not 2 when `dim` is not set.
9151        ValueError: If `dim[0]` or `dim[1]` point to the same dimension.
9152        ValueError: If `dim[0]` or `dim[1]` is not in this range:[-x_rank, x_rank).
9153                    x_rank is the dimension of Tensor `x`.
9154
9155    Supported Platforms:
9156        ``Ascend`` ``CPU``
9157
9158    Examples:
9159        >>> input_x = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
9160        ...                           [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]]), ms.float32)
9161        >>> dim = [0, 2]
9162        >>> keepdim = True
9163        >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim)
9164        >>> output = nuclearnorm(input_x)
9165        >>> print(output)
9166        [[[15.407588]
9167        [21.711605]]]
9168        >>> keepdim = False
9169        >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim)
9170        >>> output = nuclearnorm(input_x)
9171        >>> print(output)
9172        [15.407588 21.711605]
9173        >>> dim = [0, 1]
9174        >>> keepdim = True
9175        >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim)
9176        >>> output = nuclearnorm(input_x)
9177        >>> print(output)
9178        [[[14.212674 15.81139  17.492853]]]
9179        >>> keepdim = False
9180        >>> nuclearnorm = nn_ops.NuclearNorm(dim = dim,keepdim = keepdim)
9181        >>> output = nuclearnorm(input_x)
9182        >>> print(output)
9183        [14.212674 15.81139  17.492853]
9184    """
9185
9186    @prim_attr_register
9187    def __init__(self, dim=None, keepdim=False):
9188        """Initialize NuclearNorm."""
9189        validator.check_value_type("dim", dim, [list, tuple, type(None)], self.name)
9190        if dim is not None:
9191            validator.check_int(len(dim), 2, validator.EQ, 'length of dim_size', self.name)
9192            validator.check_is_int(dim[0], "dim[0]", self.name)
9193            validator.check_is_int(dim[1], "dim[1]", self.name)
9194        else:
9195            self.add_prim_attr('dim', [1000])
9196        validator.check_value_type("keepdim", keepdim, [bool], self.name)
9197
9198
9199class GLU(Primitive):
9200    r"""
9201    Computes GLU (Gated Linear Unit activation function) of input tensors.
9202
9203    .. warning::
9204        This is an experimental API that is subject to change or deletion.
9205
9206    Refer to :func:`mindspore.ops.glu` for more details.
9207
9208    Args:
9209        axis (int, optional): Axis on which to split the input.
9210            The value of `axis` must be an int within range [-rank(`x`), rank(`x`)).
9211            Default: ``-1`` , specifying the last dimension.
9212
9213    Inputs:
9214        - **x** (Tensor) - Input tensor. `x.shape[axis]` must be even.
9215
9216    Outputs:
9217        Tensor, has the same data type with `x`.
9218
9219    Supported Platforms:
9220        ``Ascend`` ``CPU``
9221
9222    Examples:
9223        >>> from mindspore import ops, Tensor
9224        >>> from mindspore import dtype as mstype
9225        >>> import numpy as np
9226        >>> axis = 0
9227        >>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
9228        ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
9229        ...                            0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
9230        ...                            0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
9231        >>> glu = ops.GLU(axis=axis)
9232        >>> y = glu(x)
9233        >>> print(y)
9234        [[[0.20028052 0.6916126  0.57412136 0.06512236 0.26307625]
9235          [0.3682598  0.3093122  0.17306386 0.10212085 0.63814086]]]
9236    """
9237
9238    @prim_attr_register
9239    def __init__(self, axis=-1):
9240        """Initialize GLU"""
9241        validator.check_value_type("axis", axis, [int], self.name)
9242
9243
9244class FractionalMaxPoolWithFixedKsize(Primitive):
9245    r"""
9246    Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
9247    The max-pooling operation is applied in :math:`(kH, kW)` regions by a stochastic step size determined by
9248    the target output size `output_shape`.
9249
9250    The number of output features is equal to the number of input planes.
9251
9252    Fractional MaxPooling is described in the paper `Fractional Max-Pooling <https://arxiv.org/pdf/1412.6071>`_.
9253
9254    Args:
9255        ksize (Union[int, tuple[int]]): Size of the pooling window. `ksize` can be a tuple of two values
9256          specify a shape :math:`(k_H, k_W)`, or a single int `K` for :math:`(K, K)`.
9257        output_shape (Union[int, tuple[int]]): The target output shape. `output_shape` can be a
9258          tuple of two values specify a shape :math:`(H_{out}, W_{out})`, or a single float `S` for :math:`(S, S)`.
9259        data_format (str, optional): The optional value for data format, is ``'NCHW'`` .
9260            Default: ``"NCHW"`` .
9261
9262    Inputs:
9263        - **input_x** (Tensor) - Tensor of shape :math:`(N, C, H_{in}, W_{in})`,
9264          with float16, float32, float64, int32, int64 data type.
9265        - **random_samples** (Tensor) - Tensor of shape :math:`(N, C, 2)`.
9266          with float16, float32, float64 data type.
9267
9268    Outputs:
9269        - **y** (Tensor) - Has the same type as the `input_x`.
9270          Has the shape :math:`(N, C, H_{out}, W_{out})`.
9271        - **argmax** (Tensor) -A tensor whose data type must be int64. Has the same shape as the `y`.
9272
9273    Raises:
9274        TypeError: If data type of `input_x` is not one of the following: float16, float32, float64, int32, int64.
9275        TypeError: If data type of `random_samples` is not one of the following: float16, float32, float64.
9276        ValueError: If `ksize` is not a number and `ksize` is not a tuple of length 2.
9277        ValueError: If `output_shape` is not a number and `output_shape` is not a tuple of length 2.
9278        ValueError: If the sum of `ksize` , `output_shape` and
9279          -1 is larger than the corresponding dimension of `input_x`.
9280        ValueError: If the dimension of `random_samples` is not 3.
9281        ValueError: If the first dimension size of `input_x` and `random_samples` is not equal.
9282        ValueError: If the second dimension size of `input_x` and `random_samples` is not equal.
9283        ValueError: If the third dimension size of `random_samples` is not 2.
9284
9285    Supported Platforms:
9286        ``CPU``
9287
9288    Examples:
9289        >>> # the ksize is an int number and the output_shape is a tuple.
9290        >>> ksize = 2
9291        >>> output_shape = (2,2)
9292        >>> data_format = "NCHW"
9293        >>> input_x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
9294        ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
9295        ...                            0.9581, 0.4760, 0.9014, 0.8522, 0.3664,
9296        ...                            0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
9297        ...                            0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([1, 1, 5, 5]), mstype.float32)
9298        >>> random_samples = Tensor(np.array([[[0.8, 0.8]]]), mstype.float32)
9299        >>> net = ops.FractionalMaxPoolWithFixedKsize(ksize, output_shape, data_format)
9300        >>> y, argmax = net(input_x, random_samples)
9301        >>> print(y)
9302        [[[[0.9545 0.8764]
9303           [0.9673 0.9852]]]]
9304        >>> print(argmax)
9305        [[[[ 1  9]
9306           [16 24]]]]
9307    """
9308
9309    @prim_attr_register
9310    def __init__(self, ksize, output_shape, data_format="NCHW"):
9311        """Initialize FractionalMaxPoolWithFixedKsize."""
9312        validator.check_value_type('ksize', ksize, [int, tuple], self.name)
9313        self.ksize = _check_positive_int_or_tuple(
9314            "ksize", ksize, self.name, allow_four=False, ret_four=False)
9315        self.add_prim_attr("ksize", self.ksize)
9316        validator.check_value_type('output_shape', output_shape, [int, tuple], self.name)
9317        self.output_shape = _check_positive_int_or_tuple(
9318            "output_shape", output_shape, self.name, allow_four=False, ret_four=False)
9319        self.add_prim_attr("output_shape", self.output_shape)
9320        self.data_format = validator.check_string(data_format, ['NCHW'], 'data_format', self.name)
9321        self.init_prim_io_names(inputs=['input_x', 'random_samples'], outputs=['y', 'argmax'])
9322
9323
9324class ChannelShuffle(Primitive):
9325    r"""
9326    Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` group and
9327    rearrange them as :math:`(*, \frac C g, g, H*W)`, while keeping the original tensor shapes.
9328
9329    .. warning::
9330        This is an experimental API that is subject to change or deletion.
9331
9332    Refer to :func:`mindspore.ops.channel_shuffle` for more detail.
9333
9334    Args:
9335        group (int): Number of group to divide channels in.
9336
9337    Inputs:
9338        - **x** (Tensor) - Tensor to be divided, it has shape :math:`(*, C, H, W)`,
9339          with float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 data type.
9340
9341    Outputs:
9342        A Tensor, has the same type as the `x`, and has the shape :math:`(*, C, H, W)`.
9343
9344    Supported Platforms:
9345        ``Ascend`` ``CPU``
9346
9347    Examples:
9348        >>> import numpy as np
9349        >>> from mindspore import Tensor, ops
9350        >>> group = 2
9351        >>> x = Tensor(np.arange(1 * 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
9352        >>> channel_shuffle_func = ops.ChannelShuffle(group)
9353        >>> y = channel_shuffle_func(x)
9354        >>> print(y)
9355        [[[[ 0  1]
9356           [ 2  3]]
9357           [[ 8  9]
9358           [10 11]]
9359           [[ 4  5]
9360           [ 6  7]]
9361           [[12 13]
9362           [14 15]]]]
9363    """
9364
9365    @prim_attr_register
9366    def __init__(self, group):
9367        """Initialize ChannelShuffle"""
9368        if not isinstance(group, int):
9369            raise ValueError(f"For '{self.name}', attr 'group' must be an positive int number")
9370        self.init_prim_io_names(inputs=['x'], outputs=['y'])
9371
9372
9373class MaxPoolWithArgmaxV2(Primitive):
9374    r"""
9375    Performs max pooling on the input Tensor and returns both max values and indices.
9376
9377    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs
9378    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
9379    :math:`(h_{ker}, w_{ker})` and stride :math:`(s_0, s_1)`, the operation is as follows:
9380
9381    .. math::
9382        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
9383        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
9384
9385    .. warning::
9386        This is an experimental API that is subject to change or deletion.
9387
9388    Args:
9389        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax
9390            value, is an int number that represents height and width of the kernel, or a tuple of
9391            two int numbers that represent height and width respectively.
9392        strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents
9393            not only the height of movement but also the width of movement, or a tuple of two int numbers that
9394            represent height and width of movement respectively. Default: ``None`` , meaning that
9395            `strides = kernel_size`.
9396        pads (Union[int, tuple[int]], optional): An int number that represents the depth,
9397            height and width of movement are both strides, or a tuple of two int numbers that represent
9398            depth, height and width of movement respectively.
9399            Default: 0.
9400        dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. Default: ``(1, 1)`` .
9401        ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
9402        argmax_type (mindspore.dtype, optional) : The dtype for argmax.
9403            Default: ``mstype.int64`` . [Disabled in Ascend.]
9404
9405    Inputs:
9406        - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8,
9407          int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64 in CPU and GPU,
9408          with that of float16 in Ascend.
9409
9410    Outputs:
9411        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
9412
9413        - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
9414          It has the same data type as `x`.
9415
9416          .. math::
9417              H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{pads[0]} - \text{dilation[0]}
9418               \times (\text{kernel_size[0]} - 1) - 1}{\text{strides[0]}} + 1\right\rfloor
9419
9420          .. math::
9421              W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{pads[1]} - \text{dilation[1]}
9422               \times (\text{kernel_size[1]} - 1) - 1}{\text{strides[1]}} + 1\right\rfloor
9423
9424        - **argmax** (Tensor) - Index corresponding to the maximum value.
9425          Data type is int32 or int64 in GPU and CPU, is uint16 in Ascend.
9426
9427    Raises:
9428        TypeError: If `x` is not a Tensor.
9429        ValueError: If length of shape of `x` is not equal to 4.
9430        TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple.
9431        ValueError: If `kernel_size`, `strides` or `dilation` is less than 1.
9432        ValueError: If `pads` is less than 0.
9433        ValueError: If `pads` is more than half of `kernel_size`.
9434        ValueError: If `argmax_type` is not mindspore.int64 or mindspore.int32.
9435        TypeError: If `ceil_mode` is not bool.
9436
9437    Supported Platforms:
9438        ``Ascend`` ``GPU`` ``CPU``
9439
9440    Examples:
9441        >>> import mindspore
9442        >>> import numpy as np
9443        >>> from mindspore import Tensor, ops
9444        >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
9445        >>> maxpool_arg_v2_op = ops.MaxPoolWithArgmaxV2(kernel_size=(3, 2), strides=(2, 1))
9446        >>> output_tensor, argmax = maxpool_arg_v2_op(x)
9447        >>> print(output_tensor.shape)
9448        (20, 16, 24, 31)
9449        >>> print(argmax.shape)
9450        (20, 16, 24, 31)
9451    """
9452
9453    @prim_attr_register
9454    def __init__(self, kernel_size, strides=None, pads=0, dilation=(1, 1), ceil_mode=False, argmax_type=mstype.int64):
9455        """Initialize MaxPoolWithArgmaxV2."""
9456        self.init_prim_io_names(inputs=["x"], outputs=["output", "argmax"])
9457        validator.check_value_type("ceil_mode", ceil_mode, bool, self.name)
9458        self.ceil_mode = ceil_mode
9459        validator.check_value_type("argmax_type", argmax_type, [mstype.Type], self.name)
9460        argmax_type_valid_values = (mstype.int32, mstype.int64)
9461        validator.check_type_name("argmax_type", argmax_type, argmax_type_valid_values, self.name)
9462        if argmax_type == mstype.int32:
9463            self.add_prim_attr("argmax_type", 3)
9464        elif argmax_type == mstype.int64:
9465            self.add_prim_attr("argmax_type", 4)
9466        else:
9467            raise ValueError(
9468                f"For '{self.name}', the 'argmax_type' must be mstype.int32 or mstype.int64, but got {argmax_type}.")
9469        self.kernel_size = _check_positive_int_or_tuple("kernel_size", kernel_size, self.name, ret_four=True)
9470        if strides is None:
9471            strides = kernel_size
9472        self.strides = _check_positive_int_or_tuple("strides", strides, self.name, ret_four=True)
9473        self.pads = _check_positive_int_or_tuple("pads", pads, self.name, ret_four=True, strict_positive=False)
9474        self.dilation = _check_positive_int_or_tuple("dilation", dilation, self.name, ret_four=True)
9475        self.add_prim_attr("kernel_size", self.kernel_size)
9476        self.add_prim_attr("strides", self.strides)
9477        self.add_prim_attr("pads", self.pads)
9478        self.add_prim_attr("dilation", self.dilation)
9479        self.add_prim_attr("ceil_mode", self.ceil_mode)
9480
9481
9482class WKV(Primitive):
9483    r"""
9484    The WKV computation is similar to AFT(Zhai et al., 2021), but W is now a channel-wise vector multiplied
9485    by relative position rather than a pairwise matrix in AFT. We also introduce a vector U for separately
9486    attending to the current token in order to compensate for potential degeneration of W.
9487
9488    Inputs:
9489        - **w** (Tensor) - The time_first tensor with data type of float32.
9490          Input tensor of shape :math:`(hidden\_size,)`.
9491        - **u** (Tensor]) - The time_decay tensor with data type of float32.
9492          Input tensor of shape :math:`(hidden\_size,)`.
9493        - **k** (Tensor) - The key tensor with data type of float32.
9494          Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
9495        - **v** (Tensor) - The value tensor with data type of float32.
9496          Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
9497        - **sp** (Tensor) - The states_p tensor with data type of float32.
9498          Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
9499        - **sq** (Tensor) - The states_q tensor with data type of float32.
9500          Input tensor of shape :math:`(batch\_size, hidden\_size)`.
9501        - **sm** (Tensor) - The states_m tensor with data type of float32.
9502          Input tensor of shape :math:`(batch\_size, hidden\_size)`.
9503
9504    Outputs:
9505        Tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
9506
9507    Supported Platforms:
9508        ``Ascend``
9509
9510    Examples:
9511        >>> from mindspore.ops.operations import nn_ops
9512        >>> b = 32
9513        >>> t = 2
9514        >>> c = 128
9515        >>> w = Tensor(np.random.randn(c).astype(np.float32))
9516        >>> u = Tensor(np.random.randn(c).astype(np.float32))
9517        >>> k = Tensor(np.random.randn(b, t, c).astype(np.float32))
9518        >>> v = Tensor(np.random.randn(b, t, c).astype(np.float32))
9519        >>> sp = Tensor(np.random.randn(b, c).astype(np.float32))
9520        >>> sq = Tensor(np.random.randn(b, c).astype(np.float32))
9521        >>> sm = Tensor(np.random.randn(b, c).astype(np.float32))
9522        >>> dense = nn_ops.WKV()
9523        >>> output = dense(w, u, k, v, sp, sq, sm)
9524        >>> print(output[0].shape)
9525        (32, 2, 128)
9526    """
9527
9528    @prim_attr_register
9529    def __init__(self):
9530        """Initialize WKV."""
9531        self.init_prim_io_names(inputs=["time_first", "time_decay", "key", "value", "sp", "sq", "sm"],
9532                                outputs=["output", "out_sp", "out_sq", "out_sm"])
9533
9534
9535class PromptFlashAttention(Primitive):
9536    r"""
9537    The interface for fully inference.
9538    B -- Batch size
9539    S -- Sequence length
9540    H -- Hidden size
9541
9542    Note:
9543    experiment ops
9544
9545    .. warning::
9546        This is an experimental API that is subject to change or deletion.
9547
9548    Args:
9549        num_heads (int): The number of heads.
9550        scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
9551          Muls in the calculation. Default: 1.0.
9552        pre_tokens (int): Previous tokens. Default: 2147483547.
9553        next_tokens (int): next tokens.  Default: 0.
9554          indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
9555          indicates that the data blocks in the upper triangle are not involved in the calculation
9556        input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
9557        num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
9558          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
9559        sparse_mode (int): Default: 0
9560        inner_precise (int): 0, float16 high precision. 1, high performance. default 1
9561
9562    Inputs:
9563        - **query** (Tensor) - The query tensor with data type of float16 or float32.
9564          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9565        - **key** (Tensor) - The key tensor with data type of float16 or float32.
9566          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9567        - **value** (Tensor) - The value tensor with data type of float16 or float32.
9568          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9569        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
9570          For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
9571        - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int64.
9572        - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int64.
9573        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
9574        - **dep_scale1** (Tensor)
9575        - **quant_scale1** (Tensor)
9576        - **deq_scale2** (Tensor)
9577        - **quant_scale2** (Tensor)
9578        - **quant_offset2** (Tensor)
9579
9580    Outputs:
9581        - **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9582
9583    Supported Platforms:
9584        ``Ascend``
9585
9586    Examples:
9587        >>> import mindspore.ops.operations.nn_ops as P
9588        >>> from mindspore import Tensor
9589        >>> import numpy as np
9590        >>> B = 1
9591        >>> N = 16
9592        >>> S = 256
9593        >>> D = 16
9594        >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
9595        >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
9596        >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
9597        >>> attn_mask = Tensor(np.ones((B, 1, S, S), dtype=np.float16))
9598        >>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
9599        >>> out = pfa(query, key, value, attn_mask, None, None, None, None, None, None, None, None)
9600        >>> print(out.shape)
9601        (1, 16, 256, 16)
9602    """
9603
9604    @prim_attr_register
9605    def __init__(self, num_heads, scale_value=1.0, pre_tokens=214748647, next_tokens=0, input_layout='BSH',
9606                 num_key_value_heads=0, sparse_mode=0, inner_precise=1):
9607        """Initialize PromptFlashAttention."""
9608        validator.check_value_type('num_heads', num_heads, [int], self.name)
9609        validator.check_value_type('scale_value', scale_value, [float], self.name)
9610        validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
9611        validator.check_value_type('next_tokens', next_tokens, [int], self.name)
9612        validator.check_value_type('input_layout', input_layout, [str], self.name)
9613        validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
9614        validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
9615        validator.check_value_type('inner_precise', inner_precise, [int], self.name)
9616        self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
9617                                        "actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1",
9618                                        "deq_scale2", "quant_scale2", "quant_offset2"],
9619                                outputs=["attention_out"])
9620
9621
9622class IncreFlashAttention(Primitive):
9623    r"""
9624    The interface for fully inference.
9625
9626    B -- Batch size
9627
9628    S -- Sequence length
9629
9630    H -- Hidden size
9631
9632    .. warning::
9633        This is an experimental API that is subject to change or deletion.
9634        If there is no input parameter and no default value, None needs to be passed.
9635
9636    Args:
9637    - **num_heads**  (int) - The number of heads.
9638    - **input_layout** (str) - the data layout of the input qkv, support `(BSH)` and `(BNSD)`. Default `BSH`.
9639    - **scale_value** (double) - The scale value indicating the scale coefficient, which is used as the scalar of
9640        Muls in the calculation. Default: 1.0.
9641    - **num_key_value_heads** (int) - head numbers of key/value which are used in GQA algorithm.
9642        The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
9643    - **block_size** (int) - Default: 0.
9644    - **inner_precise** (int) - Default: 1.
9645
9646    Inputs:
9647        - **query** (Tensor) - The query tensor with data type of float16 or bfloat16.
9648          Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
9649        - **key** (TensorList) - The key tensor with data type of float16 or bfloat16.
9650          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
9651        - **value** (TensorList) - The value tensor with data type of float16 or bfloat16.
9652          Input tensor of shape :math:`(B, S, H)` / :math:`(B, N, S, D)`.
9653        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or bool.
9654          Input tensor of shape :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`.
9655        - **actual_seq_lengths** (Tensor) - Describe actual sequence length of each input with data type of int.
9656        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
9657        - **dequant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
9658        - **quant_scale1** (Tensor) - Quantitative parametor, the tensor with data type of float.
9659        - **dequant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of uint64.
9660        - **quant_scale2** (Tensor) - Quantitative parametor, the tensor with data type of float.
9661        - **quant_offset2** (Tensor) - Quantitative parametor, the tensor with data type of float.
9662        - **antiquant_scale** (Tensor) - Quantitative parametor, the tensor with data type of float.
9663        - **antiquant_offset** (Tensor) - Quantitative parametor, the tensor with data type of float.
9664        - **block_table** (Tensor) - The tensor with data type of float.
9665
9666    Outputs:
9667        - **attention_out** (Tensor) - Input tensor of shape :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
9668
9669    Supported Platforms:
9670        ``Ascend``
9671    """
9672
9673    @prim_attr_register
9674    def __init__(self, num_heads, input_layout="BSH", scale_value=1.0, num_key_value_heads=0, block_size=0,
9675                 inner_precise=1):
9676        """Initialize IncreFlashAttention."""
9677        validator.check_value_type('num_heads', num_heads, [int], self.name)
9678        validator.check_value_type('input_layout', input_layout, [str], self.name)
9679        validator.check_value_type('scale_value', scale_value, [float], self.name)
9680        validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
9681        validator.check_value_type('block_size', block_size, [int], self.name)
9682        validator.check_value_type('inner_precise', inner_precise, [int], self.name)
9683        self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths", "pse_shift",
9684                                        "dequant_scale1", "quant_scale1", "dequant_scale2", "quant_scale2",
9685                                        "quant_offset2", "antiquant_scale", "antiquant_offset", "block_table"],
9686                                outputs=["attention_out"])
9687
9688
9689class AllFinite(Primitive):
9690    r"""
9691    Check all gradients is finite.
9692    """
9693    @prim_attr_register
9694    def __init__(self):
9695        """Initialize"""
9696        self.init_prim_io_names(inputs=['gradients'],
9697                                outputs=["is_finite"])
9698