• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""conv"""
16import numpy as np
17from mindspore import log as logger
18from mindspore import context
19from mindspore.ops import operations as P
20from mindspore.ops.primitive import constexpr
21from mindspore.common.parameter import Parameter
22from mindspore.common.initializer import initializer
23from mindspore.common.tensor import Tensor
24from mindspore._checkparam import Validator, Rel, twice, _check_3d_int_or_tuple
25from mindspore._extends import cell_attr_register
26from ..cell import Cell
27
28__all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose', 'Conv3d', 'Conv3dTranspose']
29
30
31class _Conv(Cell):
32    """
33    Applies a N-D convolution over an input signal composed of several input planes.
34    """
35
36    def __init__(self,
37                 in_channels,
38                 out_channels,
39                 kernel_size,
40                 stride,
41                 pad_mode,
42                 padding,
43                 dilation,
44                 group,
45                 has_bias,
46                 weight_init,
47                 bias_init,
48                 data_format='NCHW',
49                 transposed=False):
50        """Initialize _Conv."""
51        super(_Conv, self).__init__()
52        self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name)
53        self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name)
54        self.kernel_size = kernel_size
55        self.stride = stride
56        self.pad_mode = pad_mode
57        self.weight_init = weight_init
58        self.bias_init = bias_init
59        self.format = Validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.cls_name)
60        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
61            raise ValueError(f"For '{self.cls_name}', the \"NHWC\" format only support in GPU target, "
62                             f"but got the format is {self.format} and "
63                             f"the platform is {context.get_context('device_target')}.")
64        if context.get_context("device_target") == "CPU" and self.format == "NCDHW":
65            raise ValueError(f"For '{self.cls_name}', the \"NCDHW\" format only support in Ascend and GPU target, "
66                             f"but got the format is {self.format} and "
67                             f"the platform is {context.get_context('device_target')}.")
68        if isinstance(padding, int):
69            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
70            self.padding = padding
71        elif isinstance(padding, tuple):
72            for pad in padding:
73                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
74            self.padding = padding
75        else:
76            raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int or tuple(int), "
77                            f"but got {type(padding).__name__}.")
78
79        self.dilation = dilation
80        self.group = Validator.check_positive_int(group)
81        self.has_bias = has_bias
82        for kernel_size_elem in kernel_size:
83            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
84        for stride_elem in stride:
85            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
86        for dilation_elem in dilation:
87            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
88        if in_channels % group != 0:
89            raise ValueError(f"For '{self.cls_name}', the attr 'in_channels' must be divisible by attr 'group', "
90                             f"but got 'in_channels': {in_channels} and 'group': {group}.")
91        if out_channels % group != 0:
92            raise ValueError(f"For '{self.cls_name}', the 'out_channels' must be divisible by attr 'group', "
93                             f"but got 'out_channels': {out_channels} and 'group': {group}.")
94        if transposed:
95            shape = [in_channels, out_channels // group, *kernel_size]
96        else:
97            shape = [out_channels, *kernel_size, in_channels // group] if self.format == "NHWC" else \
98                [out_channels, in_channels // group, *kernel_size]
99        self.weight = Parameter(initializer(self.weight_init, shape), name='weight')
100
101        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
102            self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias')
103        else:
104            if self.bias_init != 'zeros':
105                logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
106            self.bias = None
107
108    def construct(self, *inputs):
109        """Must be overridden by all subclasses."""
110        raise NotImplementedError
111
112
113class Conv2d(_Conv):
114    r"""
115    2D convolution layer.
116
117    Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
118    where :math:`N` is batch size, :math:`C_{in}` is channel number, and :math:`H_{in}, W_{in}` are height and width.
119    For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
120
121    .. math::
122
123        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
124
125    where :math:`ccor` is the cross-correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
126    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
127    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
128    of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
129    where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of
130    the convolution kernel. The full kernel has shape
131    :math:`(C_{out}, C_{in} // \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
132    where group is the group number to split the input `x` in the channel dimension.
133
134    If the 'pad_mode' is set to be "valid", the output height and width will be
135    :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
136    (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor`    and
137    :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
138    (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor`    respectively.
139
140    The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
141    <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
142
143    Args:
144        in_channels (int): The number of input channel :math:`C_{in}`.
145        out_channels (int): The number of output channel :math:`C_{out}`.
146        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
147            and width of the 2D convolution window. Single int means the value is for both the height and the width of
148            the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
149            width of the kernel.
150        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
151            the height and width of movement are both strides, or a tuple of two int numbers that
152            represent height and width of movement respectively. Default: 1.
153        pad_mode (str): Specifies padding mode. The optional values are
154            "same", "valid", "pad". Default: "same".
155
156            - same: Adopts the way of completion. The height and width of the output will be the same as
157              the input `x`. The total number of padding will be calculated in horizontal and vertical
158              directions and evenly distributed to top and bottom, left and right if possible. Otherwise, the
159              last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
160              must be 0.
161
162            - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
163              without padding. Extra pixels will be discarded. If this mode is set, `padding`
164              must be 0.
165
166            - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
167              Tensor borders. `padding` must be greater than or equal to 0.
168
169        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer,
170                    the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
171                    with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
172                    padding[1], padding[2], and padding[3] accordingly. Default: 0.
173        dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
174                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
175                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
176                                      be greater or equal to 1 and bounded by the height and width of the
177                                      input `x`. Default: 1.
178        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
179            divisible by the number of groups. If the group is equal to `in_channels` and `out_channels`,
180            this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
181        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
182        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
183            It can be a Tensor, a string, an Initializer or a number. When a string is specified,
184            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
185            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
186            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
187            Initializer for more details. Default: 'normal'.
188        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
189            Initializer and string are the same as 'weight_init'. Refer to the values of
190            Initializer for more details. Default: 'zeros'.
191        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
192            Default: 'NCHW'.
193
194    Inputs:
195        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` \
196          or :math:`(N, H_{in}, W_{in}, C_{in})`.
197
198    Outputs:
199        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`.
200
201    Raises:
202        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
203        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
204        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
205        ValueError: If `padding` is less than 0.
206        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
207        ValueError: If `padding` is a tuple whose length is not equal to 4.
208        ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
209        ValueError: If `data_format` is neither 'NCHW' not 'NHWC'.
210
211    Supported Platforms:
212        ``Ascend`` ``GPU`` ``CPU``
213
214    Examples:
215        >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal')
216        >>> x = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
217        >>> output = net(x).shape
218        >>> print(output)
219        (1, 240, 1024, 640)
220    """
221
222    @cell_attr_register
223    def __init__(self,
224                 in_channels,
225                 out_channels,
226                 kernel_size,
227                 stride=1,
228                 pad_mode='same',
229                 padding=0,
230                 dilation=1,
231                 group=1,
232                 has_bias=False,
233                 weight_init='normal',
234                 bias_init='zeros',
235                 data_format='NCHW'):
236        """Initialize Conv2d."""
237        kernel_size = twice(kernel_size)
238        stride = twice(stride)
239        self._dilation = dilation
240        dilation = twice(dilation)
241        super(Conv2d, self).__init__(
242            in_channels,
243            out_channels,
244            kernel_size,
245            stride,
246            pad_mode,
247            padding,
248            dilation,
249            group,
250            has_bias,
251            weight_init,
252            bias_init,
253            data_format)
254        self.conv2d = P.Conv2D(out_channel=self.out_channels,
255                               kernel_size=self.kernel_size,
256                               mode=1,
257                               pad_mode=self.pad_mode,
258                               pad=self.padding,
259                               stride=self.stride,
260                               dilation=self.dilation,
261                               group=self.group,
262                               data_format=self.format)
263        self.bias_add = P.BiasAdd(data_format=self.format)
264
265    def construct(self, x):
266        output = self.conv2d(x, self.weight)
267        if self.has_bias:
268            output = self.bias_add(output, self.bias)
269        return output
270
271    def extend_repr(self):
272        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
273            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
274            'group={}, has_bias={}, ' \
275            'weight_init={}, bias_init={}, format={}'.format(
276                self.in_channels,
277                self.out_channels,
278                self.kernel_size,
279                self.stride,
280                self.pad_mode,
281                self.padding,
282                self.dilation,
283                self.group,
284                self.has_bias,
285                self.weight_init,
286                self.bias_init,
287                self.format)
288        return s
289
290
291@constexpr
292def _check_input_3d(input_shape, op_name):
293    if len(input_shape) != 3:
294        raise ValueError(f"For '{op_name}', the dimension of input should be 3d, but got {len(input_shape)}.")
295
296
297class Conv1d(_Conv):
298    r"""
299    1D convolution layer.
300
301    Applies a 1D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, W_{in})`,
302    where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape
303    :math:`(C_{in}, W_{in})`, the formula is defined as:
304
305    .. math::
306
307        out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
308
309    where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
310    from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
311    filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
312    of kernel and it has shape :math:`(\text{ks_w})`, where :math:`\text{ks_w}` is the width of the convolution kernel.
313    The full kernel has shape :math:`(C_{out}, C_{in} // \text{group}, \text{ks_w})`, where group is the group number
314    to split the input `x` in the channel dimension.
315
316    If the 'pad_mode' is set to be "valid", the output width will be
317    :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
318    (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor`    respectively.
319
320    The first introduction of convolution layer can be found in paper `Gradient Based Learning Applied to Document
321    Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
322
323    Args:
324        in_channels (int): The number of input channel :math:`C_{in}`.
325        out_channels (int): The number of output channel :math:`C_{out}`.
326        kernel_size (int): The data type is int. Specifies the
327            width of the 1D convolution window.
328        stride (int): The distance of kernel moving, an int number that represents
329            the width of movement. Default: 1.
330        pad_mode (str): Specifies padding mode. The optional values are
331            "same", "valid", "pad". Default: "same".
332
333            - same: Adopts the way of completion. The output width will be the same as the input `x`.
334              The total number of padding will be calculated in the horizontal
335              direction and evenly distributed to left and right if possible. Otherwise, the
336              last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
337              must be 0.
338
339            - valid: Adopts the way of discarding. The possible largest width of the output will be returned
340              without padding. Extra pixels will be discarded. If this mode is set, `padding`
341              must be 0.
342
343            - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
344              Tensor borders. `padding` must be greater than or equal to 0.
345
346        padding (int): Implicit paddings on both sides of the input `x`. Default: 0.
347        dilation (int): The data type is int. Specifies the dilation rate
348                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
349                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
350                                      be greater or equal to 1 and bounded by the height and width of the
351                                      input `x`. Default: 1.
352        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
353            divisible by the number of groups. Default: 1.
354        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
355        weight_init (Union[Tensor, str, Initializer, numbers.Number]): An initializer for the convolution kernel.
356            It can be a Tensor, a string, an Initializer or a number. When a string is specified,
357            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
358            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
359            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
360            Initializer for more details. Default: 'normal'.
361        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
362            Initializer and string are the same as 'weight_init'. Refer to the values of
363            Initializer for more details. Default: 'zeros'.
364
365    Inputs:
366        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
367
368    Outputs:
369        Tensor of shape :math:`(N, C_{out}, W_{out})`.
370
371    Raises:
372        TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
373        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
374        ValueError: If `padding` is less than 0.
375        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
376
377    Supported Platforms:
378        ``Ascend`` ``GPU`` ``CPU``
379
380    Examples:
381        >>> net = nn.Conv1d(120, 240, 4, has_bias=False, weight_init='normal')
382        >>> x = Tensor(np.ones([1, 120, 640]), mindspore.float32)
383        >>> output = net(x).shape
384        >>> print(output)
385        (1, 240, 640)
386    """
387
388    @cell_attr_register
389    def __init__(self,
390                 in_channels,
391                 out_channels,
392                 kernel_size,
393                 stride=1,
394                 pad_mode='same',
395                 padding=0,
396                 dilation=1,
397                 group=1,
398                 has_bias=False,
399                 weight_init='normal',
400                 bias_init='zeros'):
401        """Initialize Conv1d."""
402        Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
403        Validator.check_value_type("stride", stride, [int], self.cls_name)
404        Validator.check_value_type("padding", padding, [int], self.cls_name)
405        Validator.check_value_type("dilation", dilation, [int], self.cls_name)
406        Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
407        Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
408        Validator.check_non_negative_int(padding, 'padding', self.cls_name)
409        Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
410        kernel_size = (1, kernel_size)
411        stride = (1, stride)
412        dilation = (1, dilation)
413        get_shape = P.Shape()
414        get_dtype = P.DType()
415        if isinstance(weight_init, Tensor):
416            weight_init_shape = get_shape(weight_init)
417            Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
418            weight_init_dtype = get_dtype(weight_init)
419            weight_init_value = weight_init.asnumpy()
420            weight_init_value = np.expand_dims(weight_init_value, 2)
421            weight_init = Tensor(weight_init_value, weight_init_dtype)
422
423        super(Conv1d, self).__init__(
424            in_channels,
425            out_channels,
426            kernel_size,
427            stride,
428            pad_mode,
429            padding,
430            dilation,
431            group,
432            has_bias,
433            weight_init,
434            bias_init)
435        self.padding = (0, 0, padding, padding)
436        Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
437        self.conv2d = P.Conv2D(out_channel=self.out_channels,
438                               kernel_size=self.kernel_size,
439                               mode=1,
440                               pad_mode=self.pad_mode,
441                               pad=self.padding,
442                               stride=self.stride,
443                               dilation=self.dilation,
444                               group=self.group)
445        self.bias_add = P.BiasAdd()
446        self.expand_dims = P.ExpandDims()
447        self.squeeze = P.Squeeze(2)
448        self.shape = P.Shape()
449
450    def construct(self, x):
451        x_shape = self.shape(x)
452        _check_input_3d(x_shape, self.cls_name)
453        x = self.expand_dims(x, 2)
454        output = self.conv2d(x, self.weight)
455        if self.has_bias:
456            output = self.bias_add(output, self.bias)
457
458        output = self.squeeze(output)
459        return output
460
461    def extend_repr(self):
462        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
463            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
464            'group={}, has_bias={}, ' \
465            'weight_init={}, bias_init={}'.format(
466                self.in_channels,
467                self.out_channels,
468                self.kernel_size,
469                self.stride,
470                self.pad_mode,
471                self.padding,
472                self.dilation,
473                self.group,
474                self.has_bias,
475                self.weight_init,
476                self.bias_init)
477        return s
478
479
480@constexpr
481def _check_input_5dims(input_shape, op_name):
482    if len(input_shape) != 5:
483        raise ValueError(f"For '{op_name}', the dimension of input should be 5d, but got {len(input_shape)}.")
484
485
486class Conv3d(_Conv):
487    r"""
488    3D convolution layer.
489
490    Applies a 3D convolution over an input tensor which is typically of shape
491    :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
492    :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. where :math:`N` is batch size. :math:`C` is channel number.
493    the formula is defined as:
494
495    .. math::
496
497        \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
498        \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
499        \operatorname{input}\left(N_{i}, k\right))
500
501    where :math:`ccor` is the cross-correlation operator.
502
503    If the 'pad_mode' is set to be "valid", the output depth, height and width will be
504    :math:`\left \lfloor{1 + \frac{D_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
505    (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
506    :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
507    (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` and
508    :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[4]} + \text{padding[5]} - \text{kernel_size[2]} -
509    (\text{kernel_size[2]} - 1) \times (\text{dilation[2]} - 1) }{\text{stride[2]}}} \right \rfloor` respectively.
510
511    Args:
512        in_channels (int): The number of input channel :math:`C_{in}`.
513        out_channels (int): The number of output channel :math:`C_{out}`.
514        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers.
515            Specifies the depth, height and width of the 3D convolution window.
516            Single int means the value is for the depth, height and the width of the kernel.
517            A tuple of 3 ints means the first value is for the depth, second value is for height and the
518            other is for the width of the kernel.
519        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
520            the depth, height and width of movement are both strides, or a tuple of three int numbers that
521            represent depth, height and width of movement respectively. Default: 1.
522        pad_mode (str): Specifies padding mode. The optional values are
523            "same", "valid", "pad". Default: "same".
524
525            - same: Adopts the way of completion. The depth, height and width of the output will be the same as
526              the input `x`. The total number of padding will be calculated in depth, horizontal and vertical
527              directions and evenly distributed to head and tail, top and bottom, left and right if possible.
528              Otherwise, the last extra padding will be done from the tail, bottom and the right side.
529              If this mode is set, `padding` must be 0.
530
531            - valid: Adopts the way of discarding. The possible largest depth, height and width of output
532              will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding`
533              must be 0.
534
535            - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `padding`
536              will be padded to the input Tensor borders. `padding` must be greater than or equal to 0.
537
538        padding (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`.
539            The data type is int or a tuple of 6 integers. Default: 0. If `padding` is an integer,
540            the paddings of head, tail, top, bottom, left and right are the same, equal to padding.
541            If `paddings` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
542            padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly.
543        dilation (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers
544            : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1.
545            Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
546            there will be :math:`k - 1` pixels skipped for each sampling location.
547            Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
548        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
549            divisible by the number of groups. Default: 1. Only 1 is currently supported.
550        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
551        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
552            It can be a Tensor, a string, an Initializer or a number. When a string is specified,
553            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
554            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
555            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
556            Initializer for more details. Default: 'normal'.
557        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
558            Initializer and string are the same as 'weight_init'. Refer to the values of
559            Initializer for more details. Default: 'zeros'.
560        data_format (str): The optional value for data format. Currently only support "NCDHW".
561
562    Inputs:
563        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
564          Currently input data type only support float16 and float32.
565
566    Outputs:
567        Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
568
569    Raises:
570        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
571        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
572        ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
573        ValueError: If `padding` is less than 0.
574        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
575        ValueError: If `padding` is a tuple whose length is not equal to 6.
576        ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0).
577        ValueError: If `data_format` is not 'NCDHW'.
578
579    Supported Platforms:
580        ``Ascend`` ``GPU``
581
582    Examples:
583        >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float32)
584        >>> conv3d = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(4, 3, 3))
585        >>> output = conv3d(x)
586        >>> print(output.shape)
587        (16, 32, 10, 32, 32)
588    """
589
590    @cell_attr_register
591    def __init__(self,
592                 in_channels,
593                 out_channels,
594                 kernel_size,
595                 stride=1,
596                 pad_mode='same',
597                 padding=0,
598                 dilation=1,
599                 group=1,
600                 has_bias=False,
601                 weight_init='normal',
602                 bias_init='zeros',
603                 data_format='NCDHW'):
604        """Initialize Conv3d."""
605        kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name)
606        stride = _check_3d_int_or_tuple("stride", stride, self.cls_name)
607        dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name)
608        Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
609        if isinstance(padding, tuple):
610            Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name)
611        super(Conv3d, self).__init__(
612            in_channels,
613            out_channels,
614            kernel_size,
615            stride,
616            pad_mode,
617            padding,
618            dilation,
619            group,
620            has_bias,
621            weight_init,
622            bias_init,
623            data_format)
624        self.conv3d = P.Conv3D(out_channel=self.out_channels,
625                               kernel_size=self.kernel_size,
626                               mode=1,
627                               pad_mode=self.pad_mode,
628                               pad=self.padding,
629                               stride=self.stride,
630                               dilation=self.dilation,
631                               group=self.group,
632                               data_format=self.format)
633        self.bias_add = P.BiasAdd(data_format=self.format)
634        self.shape = P.Shape()
635
636    def construct(self, x):
637        x_shape = self.shape(x)
638        _check_input_5dims(x_shape, self.cls_name)
639        output = self.conv3d(x, self.weight)
640        if self.has_bias:
641            output = self.bias_add(output, self.bias)
642        return output
643
644    def extend_repr(self):
645        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
646            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
647            'group={}, has_bias={}, ' \
648            'weight_init={}, bias_init={}, format={}'.format(
649                self.in_channels,
650                self.out_channels,
651                self.kernel_size,
652                self.stride,
653                self.pad_mode,
654                self.padding,
655                self.dilation,
656                self.group,
657                self.has_bias,
658                self.weight_init,
659                self.bias_init,
660                self.format)
661        return s
662
663
664class Conv3dTranspose(_Conv):
665    r"""
666    Compute a 3D transposed convolution, which is also known as a deconvolution
667    (although it is not an actual deconvolution).
668    The transposed convolution operator multiplies each input value element-wise by a learnable kernel,
669    and sums over the outputs from all input feature planes.
670    This module can be seen as the gradient of Conv3d with respect to its input.
671
672    `x` is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size, :math:`C` is channel number,
673    :math:`D` is the characteristic depth, :math:`H` is the height of the characteristic layer,
674    and :math:`W` is the width of the characteristic layer.
675    The calculation process of transposed convolution is equivalent to the reverse calculation of convolution.
676
677    The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding
678    to both sizes of the input. So that when a Conv3d and a ConvTranspose3d are initialized with same parameters,
679    they are inverses of each other in regard to the input and output shapes.
680    However, when stride > 1, Conv3d maps multiple input shapes to the same output shape.
681    ConvTranspose3d provide padding argument to  increase the calculated output shape on one or more side.
682
683    The height and width of output are defined as:
684
685    if the 'pad_mode' is set to be "pad",
686
687    .. math::
688        D_{out} = (D_{in} - 1) \times \text{stride_d} - 2 \times \text{padding_d} + \text{dilation_d} \times
689        (\text{kernel_size_d} - 1) + \text{output_padding_d} + 1
690
691        H_{out} = (H_{in} - 1) \times \text{stride_h} - 2 \times \text{padding_h} + \text{dilation_h} \times
692        (\text{kernel_size_h} - 1) + \text{output_padding_h} + 1
693
694        W_{out} = (W_{in} - 1) \times \text{stride_w} - 2 \times \text{padding_w} + \text{dilation_w} \times
695        (\text{kernel_size_w} - 1) + \text{output_padding_w} + 1
696
697    if the 'pad_mode' is set to be "same",
698
699    .. math::
700
701        D_{out} = (D_{in} + \text{stride_d} - 1)/\text{stride_d} \\
702        H_{out} = (H_{in} + \text{stride_h} - 1)/\text{stride_h} \\
703        W_{out} = (W_{in} + \text{stride_w} - 1)/\text{stride_w}
704
705    if the 'pad_mode' is set to be "valid",
706
707    .. math::
708
709        D_{out} = (D_{in} - 1) \times \text{stride_d} + \text{dilation_d} \times
710        (\text{kernel_size_d} - 1) + 1 \\
711        H_{out} = (H_{in} - 1) \times \text{stride_h} + \text{dilation_h} \times
712        (\text{kernel_size_h} - 1) + 1 \\
713        W_{out} = (W_{in} - 1) \times \text{stride_w} + \text{dilation_w} \times
714        (\text{kernel_size_w} - 1) + 1
715
716    Args:
717        in_channels (int): The number of input channel :math:`C_{in}`.
718        out_channels (int): The number of output channel :math:`C_{out}`.
719        kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution.
720        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
721            the depth, height and width of movement are both strides, or a tuple of three int numbers that
722            represent depth, height and width of movement respectively. Its value must be equal to or greater than 1.
723            Default: 1.
724        pad_mode (str): Select the mode of the pad. The optional values are
725            "pad", "same", "valid". Default: "same".
726
727            - same: Adopts the way of completion. The depth, height and width of the output will be the same as
728              the input `x`. The total number of padding will be calculated in depth, horizontal and vertical
729              directions and evenly distributed to head and tail, top and bottom, left and right if possible.
730              Otherwise, the last extra padding will be done from the tail, bottom and the right side.
731              If this mode is set, `padding` and `output_padding` must be 0.
732
733            - valid: Adopts the way of discarding. The possible largest depth, height and width of output
734              will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding`
735              and `output_padding` must be 0.
736
737            - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `pad` will
738              be padded to the input Tensor borders. `padding` must be greater than or equal to 0.
739
740        padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer,
741            the paddings of head, tail, top, bottom, left and right are the same, equal to padding.
742            If `padding` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
743            padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly.
744        dilation (Union(int, tuple[int])): The data type is int or a tuple of 3 integers
745            : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1.
746            Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
747            there will be :math:`k - 1` pixels skipped for each sampling location.
748            Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
749        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
750            divisible by the number of groups. Default: 1. Only 1 is currently supported.
751        output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: 0.
752            Must be greater than or equal to 0.
753        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
754        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
755            It can be a Tensor, a string, an Initializer or a number. When a string is specified,
756            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
757            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
758            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
759            Initializer for more details. Default: 'normal'.
760        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
761            Initializer and string are the same as 'weight_init'. Refer to the values of
762            Initializer for more details. Default: 'zeros'.
763        data_format (str): The optional value for data format. Currently only support 'NCDHW'.
764
765    Inputs:
766        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
767          Currently input data type only support float16 and float32.
768
769    Outputs:
770        Tensor, the shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
771
772    Supported Platforms:
773        ``Ascend`` ``GPU``
774
775    Raises:
776        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
777        TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding`
778                   is neither an int not a tuple of three.
779        TypeError: If input data type is not float16 or float32.
780        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
781        ValueError: If `padding` is less than 0.
782        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
783        ValueError: If `padding` is a tuple whose length is not equal to 6.
784        ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0).
785        ValueError: If `data_format` is not 'NCDHW'.
786
787    Examples:
788        >>> x = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float32)
789        >>> conv3d_transpose = nn.Conv3dTranspose(in_channels=16, out_channels=3, kernel_size=(4, 6, 2),
790        ...                                       pad_mode='pad')
791        >>> output = conv3d_transpose(x)
792        >>> print(output.shape)
793        (32, 3, 13, 37, 33)
794    """
795
796    def __init__(self,
797                 in_channels,
798                 out_channels,
799                 kernel_size,
800                 stride=1,
801                 pad_mode='same',
802                 padding=0,
803                 dilation=1,
804                 group=1,
805                 output_padding=0,
806                 has_bias=False,
807                 weight_init='normal',
808                 bias_init='zeros',
809                 data_format='NCDHW'):
810        """Initialize Conv3dTranspose."""
811        kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name)
812        stride = _check_3d_int_or_tuple("stride", stride, self.cls_name)
813        dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name)
814        Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
815        if isinstance(padding, tuple):
816            Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name)
817        output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, greater_zero=False)
818        super(Conv3dTranspose, self).__init__(
819            in_channels,
820            out_channels,
821            kernel_size,
822            stride,
823            pad_mode,
824            padding,
825            dilation,
826            group,
827            has_bias,
828            weight_init,
829            bias_init,
830            data_format,
831            transposed=True)
832        self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels,
833                                                  out_channel=self.out_channels,
834                                                  kernel_size=self.kernel_size,
835                                                  mode=1,
836                                                  pad_mode=self.pad_mode,
837                                                  pad=self.padding,
838                                                  stride=self.stride,
839                                                  dilation=self.dilation,
840                                                  group=self.group,
841                                                  output_padding=output_padding,
842                                                  data_format=self.format)
843        self.bias_add = P.BiasAdd(data_format=self.format)
844        self.shape = P.Shape()
845
846    def construct(self, x):
847        x_shape = self.shape(x)
848        _check_input_5dims(x_shape, self.cls_name)
849        output = self.conv3d_transpose(x, self.weight)
850        if self.has_bias:
851            output = self.bias_add(output, self.bias)
852        return output
853
854    def extend_repr(self):
855        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
856            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
857            'group={}, has_bias={}, ' \
858            'weight_init={}, bias_init={}'.format(self.in_channels,
859                                                  self.out_channels,
860                                                  self.kernel_size,
861                                                  self.stride,
862                                                  self.pad_mode,
863                                                  self.padding,
864                                                  self.dilation,
865                                                  self.group,
866                                                  self.has_bias,
867                                                  self.weight_init,
868                                                  self.bias_init)
869        return s
870
871
872def _deconv_output_length(is_valid, is_same, is_pad, input_length, filter_size, stride_size, dilation_size, padding):
873    """Calculate the width and height of output."""
874    length = 0
875    filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
876    if is_valid:
877        if filter_size - stride_size > 0:
878            length = input_length * stride_size + filter_size - stride_size
879        else:
880            length = input_length * stride_size
881    elif is_same:
882        length = input_length * stride_size
883    elif is_pad:
884        length = input_length * stride_size - padding + filter_size - stride_size
885
886    return length
887
888
889class Conv2dTranspose(_Conv):
890    r"""
891    2D transposed convolution layer.
892
893    Compute a 2D transposed convolution, which is also known as a deconvolution
894    (although it is not an actual deconvolution).
895    This module can be seen as the gradient of Conv2d with respect to its input.
896
897    `x` is typically of shape :math:`(N, C, H, W)`, where :math:`N` is batch size, :math:`C` is channel number,
898    :math:`H` is the height of the characteristic layer and :math:`W` is the width of the characteristic layer.
899
900    The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding
901    to both sizes of the input. So that when a Conv2d and a ConvTranspose2d are initialized with same parameters,
902    they are inverses of each other in regard to the input and output shapes.
903    However, when stride > 1, Conv2d maps multiple input shapes to the same output shape.
904    ConvTranspose2d provide padding argument to  increase the calculated output shape on one or more side.
905
906    The height and width of output are defined as:
907
908    if the 'pad_mode' is set to be "pad",
909
910    .. math::
911
912        H_{out} = (H_{in} - 1) \times \text{stride[0]} - \left (\text{padding[0]} + \text{padding[1]}\right ) +
913        \text{dilation[0]} \times (\text{kernel_size[0]} - 1) + 1
914
915        W_{out} = (W_{in} - 1) \times \text{stride[1]} - \left (\text{padding[2]} + \text{padding[3]}\right ) +
916        \text{dilation[1]} \times (\text{kernel_size[1]} - 1) + 1
917
918    if the 'pad_mode' is set to be "same",
919
920    .. math::
921
922        H_{out} = (H_{in} + \text{stride[0]} - 1)/\text{stride[0]} \\
923        W_{out} = (W_{in} + \text{stride[1]} - 1)/\text{stride[1]}
924
925    if the 'pad_mode' is set to be "valid",
926
927    .. math::
928
929        H_{out} = (H_{in} - 1) \times \text{stride[0]} + \text{dilation[0]} \times
930        (\text{ks_w[0]} - 1) + 1 \\
931        W_{out} = (W_{in} - 1) \times \text{stride[1]} + \text{dilation[1]} \times
932        (\text{ks_w[1]} - 1) + 1
933
934    where :math:`\text{kernel_size[0]}` is the height of the convolution kernel and :math:`\text{kernel_size[1]}`
935    is the width of the convolution kernel.
936
937    Args:
938        in_channels (int): The number of channels in the input space.
939        out_channels (int): The number of channels in the output space.
940        kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height
941            and width of the 2D convolution window. Single int means the value is for both the height and the width of
942            the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
943            width of the kernel.
944        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
945            the height and width of movement are both strides, or a tuple of two int numbers that
946            represent height and width of movement respectively. Its value must be equal to or greater than 1.
947            Default: 1.
948        pad_mode (str): Select the mode of the pad. The optional values are
949            "pad", "same", "valid". Default: "same".
950
951            - pad: Implicit paddings on both sides of the input `x`.
952
953            - same: Adopted the way of completion.
954
955            - valid: Adopted the way of discarding.
956        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer,
957                    the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
958                    with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
959                    padding[1], padding[2], and padding[3] accordingly. Default: 0.
960        dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
961                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
962                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
963                                      be greater than or equal to 1 and bounded by the height and width of the
964                                      input `x`. Default: 1.
965        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
966            divisible by the number of groups. This does not support for Davinci devices when group > 1. Default: 1.
967        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
968        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
969            It can be a Tensor, a string, an Initializer or a number. When a string is specified,
970            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
971            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
972            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
973            Initializer for more details. Default: 'normal'.
974        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
975            Initializer and string are the same as 'weight_init'. Refer to the values of
976            Initializer for more details. Default: 'zeros'.
977
978    Inputs:
979        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
980
981    Outputs:
982        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
983
984    Raises:
985        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
986        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
987        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
988        ValueError: If `padding` is less than 0.
989        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
990        ValueError: If `padding` is a tuple whose length is not equal to 4.
991        ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
992
993    Supported Platforms:
994        ``Ascend`` ``GPU`` ``CPU``
995
996    Examples:
997        >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
998        >>> x = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32)
999        >>> output = net(x).shape
1000        >>> print(output)
1001        (1, 64, 19, 53)
1002        """
1003
1004    def __init__(self,
1005                 in_channels,
1006                 out_channels,
1007                 kernel_size,
1008                 stride=1,
1009                 pad_mode='same',
1010                 padding=0,
1011                 dilation=1,
1012                 group=1,
1013                 has_bias=False,
1014                 weight_init='normal',
1015                 bias_init='zeros'):
1016        """Initialize Conv2dTranspose."""
1017        kernel_size = twice(kernel_size)
1018        stride = twice(stride)
1019        dilation = twice(dilation)
1020        Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
1021        if isinstance(padding, tuple):
1022            Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name)
1023        # out_channels and in_channels swap.
1024        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
1025        # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
1026        super(Conv2dTranspose, self).__init__(
1027            in_channels,
1028            out_channels,
1029            kernel_size,
1030            stride,
1031            pad_mode,
1032            padding,
1033            dilation,
1034            group,
1035            has_bias,
1036            weight_init,
1037            bias_init,
1038            transposed=True)
1039
1040        self.in_channels = in_channels
1041        self.out_channels = out_channels
1042        self.shape = P.Shape()
1043        Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
1044        self.is_valid = self.pad_mode == 'valid'
1045        self.is_same = self.pad_mode == 'same'
1046        self.is_pad = self.pad_mode == 'pad'
1047        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
1048            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
1049
1050        # cause Conv2DTranspose's out_channel refers to Conv2D's out_channel.
1051        self.conv2d_transpose = P.Conv2DTranspose(out_channel=in_channels,
1052                                                  kernel_size=kernel_size,
1053                                                  mode=1,
1054                                                  pad_mode=pad_mode,
1055                                                  pad=padding,
1056                                                  stride=stride,
1057                                                  dilation=dilation,
1058                                                  group=group)
1059        self.bias_add = P.BiasAdd()
1060        if isinstance(self.padding, int):
1061            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4
1062        else:
1063            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
1064
1065    def shard(self, strategy):
1066        self.conv2d_transpose.shard(strategy)
1067        return self
1068
1069    def construct(self, x):
1070        n, _, h, w = self.shape(x)
1071        h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0],
1072                                      self.stride[0], self.dilation[0], self.padding_top + self.padding_bottom)
1073        w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1],
1074                                      self.stride[1], self.dilation[1], self.padding_left + self.padding_right)
1075        if self.has_bias:
1076            return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)),
1077                                 self.bias)
1078        return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
1079
1080    def extend_repr(self):
1081        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
1082            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
1083            'group={}, has_bias={}, ' \
1084            'weight_init={}, bias_init={}'.format(self.in_channels,
1085                                                  self.out_channels,
1086                                                  self.kernel_size,
1087                                                  self.stride,
1088                                                  self.pad_mode,
1089                                                  self.padding,
1090                                                  self.dilation,
1091                                                  self.group,
1092                                                  self.has_bias,
1093                                                  self.weight_init,
1094                                                  self.bias_init)
1095        return s
1096
1097
1098class Conv1dTranspose(_Conv):
1099    r"""
1100    1D transposed convolution layer.
1101
1102    Compute a 1D transposed convolution, which is also known as a deconvolution
1103    (although it is not an actual deconvolution).
1104    This module can be seen as the gradient of Conv1d with respect to its input.
1105
1106    `x` is typically of shape :math:`(N, C, W)`, where :math:`N` is batch size, :math:`C` is channel number and
1107    :math:`W` is the characteristic length.
1108
1109    The padding argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding to
1110    both sizes of the input. So that when a Conv1d and a ConvTranspose1d are initialized with same parameters,
1111    they are inverses of each other in regard to the input and output shapes. However, when stride > 1,
1112    Conv1d maps multiple input shapes to the same output shape.
1113
1114    The width of output is defined as:
1115
1116    .. math::
1117
1118        W_{out} = \begin{cases}
1119        (W_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times
1120        (\text{ks_w} - 1) + 1, & \text{if pad_mode='pad'}\\
1121        (W_{in} + \text{stride} - 1)/\text{stride}, & \text{if pad_mode='same'}\\
1122        (W_{in} - 1) \times \text{stride} + \text{dilation} \times
1123        (\text{ks_w} - 1) + 1, & \text{if pad_mode='valid'}
1124        \end{cases}
1125
1126    where :math:`\text{ks_w}` is the width of the convolution kernel.
1127
1128    Args:
1129        in_channels (int): The number of channels in the input space.
1130        out_channels (int): The number of channels in the output space.
1131        kernel_size (int): int, which specifies the width of the 1D convolution window.
1132        stride (int): The distance of kernel moving, an int number that represents
1133            the width of movement. Default: 1.
1134        pad_mode (str): Select the mode of the pad. The optional values are
1135            "pad", "same", "valid". Default: "same".
1136
1137            - pad: Implicit paddings on both sides of the input `x`.
1138
1139            - same: Adopted the way of completion.
1140
1141            - valid: Adopted the way of discarding.
1142        padding (int): Implicit paddings on both sides of the input `x`. Default: 0.
1143        dilation (int): The data type is int. Specifies the dilation rate
1144                                      to use for dilated convolution. If set to be :math:`k > 1`, there will
1145                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
1146                                      be greater or equal to 1 and bounded by the width of the
1147                                      input `x`. Default: 1.
1148        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
1149            divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1.
1150        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
1151        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
1152            It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
1153            values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
1154            as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
1155            and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
1156            Initializer for more details. Default: 'normal'.
1157        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
1158            Initializer and string are the same as 'weight_init'. Refer to the values of
1159            Initializer for more details. Default: 'zeros'.
1160
1161    Inputs:
1162        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
1163
1164    Outputs:
1165        Tensor of shape :math:`(N, C_{out}, W_{out})`.
1166
1167    Raises:
1168        TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
1169        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
1170        ValueError: If `padding` is less than 0.
1171        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
1172
1173    Supported Platforms:
1174        ``Ascend`` ``GPU`` ``CPU``
1175
1176    Examples:
1177        >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
1178        >>> x = Tensor(np.ones([1, 3, 50]), mindspore.float32)
1179        >>> output = net(x).shape
1180        >>> print(output)
1181        (1, 64, 53)
1182    """
1183
1184    def __init__(self,
1185                 in_channels,
1186                 out_channels,
1187                 kernel_size,
1188                 stride=1,
1189                 pad_mode='same',
1190                 padding=0,
1191                 dilation=1,
1192                 group=1,
1193                 has_bias=False,
1194                 weight_init='normal',
1195                 bias_init='zeros'):
1196        """Initialize Conv1dTranspose."""
1197        Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
1198        Validator.check_value_type("stride", stride, [int], self.cls_name)
1199        Validator.check_value_type("padding", padding, [int], self.cls_name)
1200        Validator.check_value_type("dilation", dilation, [int], self.cls_name)
1201        Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
1202        Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
1203        Validator.check_non_negative_int(padding, 'padding', self.cls_name)
1204        Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
1205        kernel_size = (1, kernel_size)
1206        stride = (1, stride)
1207        dilation = (1, dilation)
1208        get_shape = P.Shape()
1209        get_dtype = P.DType()
1210        if isinstance(weight_init, Tensor):
1211            weight_init_shape = get_shape(weight_init)
1212            Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
1213            weight_init_dtype = get_dtype(weight_init)
1214            weight_init_value = weight_init.asnumpy()
1215            weight_init_value = np.expand_dims(weight_init_value, 2)
1216            weight_init = Tensor(weight_init_value, weight_init_dtype)
1217        # out_channels and in_channels swap.
1218        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
1219        # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
1220        super(Conv1dTranspose, self).__init__(
1221            in_channels,
1222            out_channels,
1223            kernel_size,
1224            stride,
1225            pad_mode,
1226            padding,
1227            dilation,
1228            group,
1229            has_bias,
1230            weight_init,
1231            bias_init,
1232            transposed=True)
1233        self.padding = (0, 0, padding, padding)
1234        self.in_channels = in_channels
1235        self.out_channels = out_channels
1236        self.shape = P.Shape()
1237        Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
1238        self.is_valid = self.pad_mode == 'valid'
1239        self.is_same = self.pad_mode == 'same'
1240        self.is_pad = self.pad_mode == 'pad'
1241        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
1242            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
1243
1244        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
1245        self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
1246                                                      kernel_size=kernel_size,
1247                                                      mode=1,
1248                                                      pad_mode=pad_mode,
1249                                                      pad=self.padding,
1250                                                      stride=stride,
1251                                                      dilation=dilation,
1252                                                      group=group)
1253        self.bias_add = P.BiasAdd()
1254        self.expand_dims = P.ExpandDims()
1255        self.squeeze = P.Squeeze(2)
1256
1257    def shard(self, strategy):
1258        self.conv2d_transpose.shard(strategy)
1259        return self
1260
1261    def construct(self, x):
1262        x_shape = self.shape(x)
1263        _check_input_3d(x_shape, self.cls_name)
1264        x = self.expand_dims(x, 2)
1265
1266        n, _, h, w = self.shape(x)
1267
1268        h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0],
1269                                      self.stride[0], self.dilation[0], self.padding[0] + self.padding[1])
1270        w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1],
1271                                      self.stride[1], self.dilation[1], self.padding[2] + self.padding[3])
1272        output = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
1273        if self.has_bias:
1274            output = self.bias_add(output, self.bias)
1275
1276        output = self.squeeze(output)
1277        return output
1278
1279    def extend_repr(self):
1280        s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
1281            'stride={}, pad_mode={}, padding={}, dilation={}, ' \
1282            'group={}, has_bias={}, ' \
1283            'weight_init={}, bias_init={}'.format(self.in_channels,
1284                                                  self.out_channels,
1285                                                  self.kernel_size,
1286                                                  self.stride,
1287                                                  self.pad_mode,
1288                                                  self.padding,
1289                                                  self.dilation,
1290                                                  self.group,
1291                                                  self.has_bias,
1292                                                  self.weight_init,
1293                                                  self.bias_init)
1294        return s
1295