1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""conv""" 16from __future__ import absolute_import 17 18import math 19import numpy as np 20 21from mindspore import context 22from mindspore.ops import operations as P 23import mindspore.common.dtype as mstype 24from mindspore.common.parameter import Parameter 25from mindspore.common.initializer import initializer, HeUniform, Uniform, _calculate_fan_in_and_fan_out 26from mindspore.common.tensor import Tensor 27from mindspore import _checkparam as Validator 28from mindspore._checkparam import twice, _check_3d_int_or_tuple 29from mindspore._extends import cell_attr_register 30from mindspore.nn.cell import Cell 31from mindspore.ops.primitive import _primexpr 32 33__all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose', 'Conv3d', 'Conv3dTranspose'] 34 35 36class _Conv(Cell): 37 """ 38 Applies a N-D convolution over an input signal composed of several input planes. 39 """ 40 41 def __init__(self, 42 in_channels, 43 out_channels, 44 kernel_size, 45 stride, 46 pad_mode, 47 padding, 48 dilation, 49 group, 50 has_bias, 51 weight_init, 52 bias_init, 53 data_format='NCHW', 54 transposed=False, 55 dtype=mstype.float32): 56 """Initialize _Conv.""" 57 super(_Conv, self).__init__() 58 self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name) 59 self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name) 60 self.kernel_size = kernel_size 61 self.stride = stride 62 self.pad_mode = pad_mode 63 self.data_format = Validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.cls_name) 64 if context.get_context("device_target") != "GPU" and self.data_format == "NHWC": 65 raise ValueError(f"For '{self.cls_name}', the \"NHWC\" format only support in GPU target, " 66 f"but got the 'format' is {self.data_format} and " 67 f"the platform is {context.get_context('device_target')}.") 68 if isinstance(padding, int): 69 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 70 self.padding = padding 71 elif isinstance(padding, tuple): 72 for pad in padding: 73 Validator.check_non_negative_int(pad, 'padding item', self.cls_name) 74 self.padding = padding 75 else: 76 raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int or tuple(int), " 77 f"but got {type(padding).__name__}.") 78 79 self.dilation = dilation 80 self.group = Validator.check_positive_int(group) 81 self.has_bias = has_bias 82 for kernel_size_elem in kernel_size: 83 Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) 84 for stride_elem in stride: 85 Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) 86 for dilation_elem in dilation: 87 Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) 88 if in_channels % group != 0: 89 raise ValueError(f"For '{self.cls_name}', the attr 'in_channels' must be divisible by attr 'group', " 90 f"but got 'in_channels': {in_channels} and 'group': {group}.") 91 if out_channels % group != 0: 92 raise ValueError(f"For '{self.cls_name}', the 'out_channels' must be divisible by attr 'group', " 93 f"but got 'out_channels': {out_channels} and 'group': {group}.") 94 if transposed: 95 shape = [in_channels, out_channels // group, *kernel_size] 96 else: 97 shape = [out_channels, *kernel_size, in_channels // group] if self.data_format == "NHWC" else \ 98 [out_channels, in_channels // group, *kernel_size] 99 if weight_init is None: 100 weight_init = HeUniform(math.sqrt(5)) 101 self.weight_init = weight_init 102 self.weight = Parameter(initializer(self.weight_init, shape, dtype=dtype), name='weight') 103 104 self.bias_init = bias_init 105 if Validator.check_bool(has_bias, "has_bias", self.cls_name): 106 if bias_init is None: 107 fan_in, _ = _calculate_fan_in_and_fan_out(shape) 108 if fan_in != 0: 109 bound = 1 / math.sqrt(fan_in) 110 bias_init = Uniform(bound) 111 else: 112 bias_init = 'zeros' 113 self.bias_init = bias_init 114 self.bias = Parameter(initializer(self.bias_init, [out_channels], dtype=dtype), name='bias') 115 else: 116 self.bias = None 117 118 def construct(self, *inputs): 119 """Must be overridden by all subclasses.""" 120 raise NotImplementedError 121 122 def extend_repr(self): 123 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 124 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 125 'group={}, has_bias={}, ' \ 126 'weight_init={}, bias_init={}, format={}'.format( 127 self.in_channels, 128 self.out_channels, 129 self.kernel_size, 130 self.stride, 131 self.pad_mode, 132 self.padding, 133 self.dilation, 134 self.group, 135 self.has_bias, 136 self.weight_init, 137 self.bias_init, 138 self.data_format) 139 return s 140 141 142class Conv2d(_Conv): 143 r""" 144 2D convolution layer. 145 146 Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, 147 where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width. 148 149 The output is calculated based on formula: 150 151 .. math:: 152 153 \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + 154 \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)}) 155 156 where :math:`bias` is the output channel bias, :math:`ccor` is 157 the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_, 158 :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map. 159 160 Here are the indices' meanings: 161 162 - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`, 163 where :math:`N` is the batch size of the input. 164 165 - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`, 166 where :math:`C_{out}` is the number of 167 output channels, which is also equal to the number of kernels. 168 169 - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`, 170 where :math:`C_{in}` is the number of 171 input channels, which is also equal to the number of channels in the convolutional kernels. 172 173 Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th 174 output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional 175 kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input 176 channel in the :math:`i`-th batch of the input feature map. 177 178 The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]},\text{kernel_size[1]})`, 179 where :math:`\text{kernel_size[0]}` 180 and :math:`\text{kernel_size[1]}` are the height and width of the kernel, respectively. 181 If we consider the input and output channels as well as the `group` parameter, the complete kernel shape 182 will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, 183 where `group` is the number of groups dividing `x`'s input channel when applying group convolution. 184 185 For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition 186 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 187 188 Note: 189 On Ascend platform, only group convolution in depthwise convolution scenarios is supported. 190 That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. 191 192 Args: 193 in_channels (int): The channel number of the input tensor of the Conv2d layer. 194 out_channels (int): The channel number of the output tensor of the Conv2d layer. 195 kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. 196 The data type is an integer or a tuple of two integers. An integer represents the height 197 and width of the convolution kernel. A tuple of two integers represents the height 198 and width of the convolution kernel respectively. 199 stride (Union[int, tuple[int]], optional): The movement stride of the 2D convolution kernel. 200 The data type is an integer or a tuple of two or four integers. An integer represents the movement step size 201 in both height and width directions. A tuple of two integers represents the movement step size in the height 202 and width directions respectively. Default: ``1`` . 203 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 204 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 205 206 - ``"same"``: Pad the input around its edges so that the shape of input and output 207 are the same when `stride` is set to ``1``. 208 The amount of padding to is calculated by the operator internally, If the amount is even, it is 209 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 210 If this mode is set, `padding` must be 0. 211 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 212 possible height and width. Extra pixels that could not complete a full stride will 213 be discarded. If this mode is set, `padding` must be 0. 214 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 215 in the height and width directions is determined by the `padding` parameter. 216 If this mode is set, `padding` must be greater than or equal to 0. 217 218 padding (Union[int, tuple[int]], optional): The number of padding 219 on the height and width directions of the input. 220 The data type is an integer or a tuple of four integers. If `padding` is an integer, 221 then the top, bottom, left, and right padding are all equal to `padding`. 222 If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding 223 is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. 224 The value should be greater than or equal to 0. Default: ``0`` . 225 dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution. 226 It can be a single int or a tuple of 2 or 4 integers. A single int means the dilation size is the same 227 in both the height and width directions. A tuple of two ints represents the dilation size in 228 the height and width directions, respectively. For a tuple of four ints, the two ints correspond 229 to (N, C) dimension are treated as 1, and the two correspond to (H, W) dimensions is the 230 dilation size in the height and width directions respectively. 231 Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a 232 spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction. 233 The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively. 234 Default: ``1`` . 235 group (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be 236 divisible by `group`. If the group is equal to `in_channels` and `out_channels`, 237 this 2D convolution layer also can be called 2D depthwise convolution layer. Default: ``1`` . 238 has_bias (bool, optional): Whether the Conv2d layer has a bias parameter. Default: ``False`` . 239 weight_init (Union[Tensor, str, Initializer, numbers.Number], optional): Initialization method of 240 weight parameter. 241 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 242 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 243 distributions as well as constant ``'One'`` and ``'Zero'`` distributions are possible. Alias 244 ``'xavier_uniform'`` , ``'he_uniform'`` , ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and 245 lowercase are both acceptable. Refer to the values of 246 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 247 for more details. Default: ``None`` , weight will be initialized using ``'HeUniform'``. 248 bias_init (Union[Tensor, str, Initializer, numbers.Number], optional): Initialization method of bias parameter. 249 Available initialization methods are the same as 'weight_init'. Refer to the values of 250 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 251 for more details. Default: ``None`` , bias will be initialized using ``'Uniform'`` . 252 data_format (str, optional): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` . 253 Default: ``'NCHW'`` . (NHWC is only supported in GPU now.) 254 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 255 256 Inputs: 257 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` \ 258 or :math:`(N, H_{in}, W_{in}, C_{in})`. 259 260 Outputs: 261 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`. 262 263 pad_mode is ``'same'``: 264 265 .. math:: 266 \begin{array}{ll} \\ 267 H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ 268 W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ 269 \end{array} 270 271 pad_mode is ``'valid'``: 272 273 .. math:: 274 \begin{array}{ll} \\ 275 H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } 276 {\text{stride[0]}}} \right \rceil \\ 277 W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } 278 {\text{stride[1]}}} \right \rceil \\ 279 \end{array} 280 281 pad_mode is ``'pad'``: 282 283 .. math:: 284 \begin{array}{ll} \\ 285 H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times 286 \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ 287 W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times 288 \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ 289 \end{array} 290 291 Raises: 292 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 293 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. 294 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 295 ValueError: If `padding` is less than 0. 296 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 297 ValueError: If `padding` is a tuple whose length is not equal to 4. 298 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). 299 ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'. 300 301 Supported Platforms: 302 ``Ascend`` ``GPU`` ``CPU`` 303 304 Examples: 305 >>> import mindspore 306 >>> from mindspore import Tensor, nn 307 >>> import numpy as np 308 >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal') 309 >>> x = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32) 310 >>> output = net(x).shape 311 >>> print(output) 312 (1, 240, 1024, 640) 313 """ 314 315 @cell_attr_register 316 def __init__(self, 317 in_channels, 318 out_channels, 319 kernel_size, 320 stride=1, 321 pad_mode='same', 322 padding=0, 323 dilation=1, 324 group=1, 325 has_bias=False, 326 weight_init=None, 327 bias_init=None, 328 data_format='NCHW', 329 dtype=mstype.float32): 330 """Initialize Conv2d.""" 331 kernel_size = twice(kernel_size) 332 stride = twice(stride) 333 self._dilation = dilation 334 dilation = twice(dilation) 335 Validator.check_positive_int(group, 'group', self.cls_name) 336 if not (in_channels % group == 0 and out_channels % group == 0): 337 raise ValueError(f"The argument 'group' should be divisible by 'in_channels' " \ 338 f"and 'out_channels', but got group:{group}, in_channels:{in_channels}, " \ 339 f"out_channels:{out_channels}.") 340 super(Conv2d, self).__init__( 341 in_channels, 342 out_channels, 343 kernel_size, 344 stride, 345 pad_mode, 346 padding, 347 dilation, 348 group, 349 has_bias, 350 weight_init, 351 bias_init, 352 data_format, 353 dtype=dtype) 354 self.conv2d = P.Conv2D(out_channel=self.out_channels, 355 kernel_size=self.kernel_size, 356 mode=1, 357 pad_mode=self.pad_mode, 358 pad=self.padding, 359 stride=self.stride, 360 dilation=self.dilation, 361 group=self.group, 362 data_format=self.data_format) 363 self.bias_add = P.BiasAdd(data_format=self.data_format) 364 365 def construct(self, x): 366 output = self.conv2d(x, self.weight) 367 if self.has_bias: 368 output = self.bias_add(output, self.bias) 369 return output 370 371 372class Conv1d(_Conv): 373 r""" 374 1D convolution layer. 375 376 Applies a 1D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, L_{in})`, 377 where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width. 378 379 The output is calculated based on formula: 380 381 .. math:: 382 383 \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + 384 \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)}) 385 386 where :math:`bias` is the output channel bias, :math:`ccor` is 387 the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_, 388 :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map. 389 390 Here are the indices' meanings: 391 392 - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`, 393 where :math:`N` is the batch size of the input. 394 395 - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`, 396 where :math:`C_{out}` is the number of 397 output channels, which is also equal to the number of kernels. 398 399 - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`, 400 where :math:`C_{in}` is the number of 401 input channels, which is also equal to the number of channels in the convolutional kernels. 402 403 Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th 404 output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional 405 kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input 406 channel in the :math:`i`-th batch of the input feature map. 407 408 The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`, 409 where :math:`\text{kernel_size}` is the width of the kernel. 410 If we consider the input and output channels as well as the `group` parameter, the complete kernel shape 411 will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, 412 where `group` is the number of groups dividing `x`'s input channel when applying group convolution. 413 414 For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition 415 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ 416 and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ . 417 418 Note: 419 On Ascend platform, only group convolution in depthwise convolution scenarios is supported. 420 That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. 421 422 Args: 423 in_channels (int): The channel number of the input tensor of the Conv1d layer. 424 out_channels (int): The channel number of the output tensor of the Conv1d layer. 425 kernel_size (int): Specifies the width of the 1D convolution kernel. 426 stride (int, optional): The movement stride of the 1D convolution kernel. Default: ``1`` . 427 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 428 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 429 430 - ``"same"``: Pad the input at the begin and end so that the shape of input and output 431 are the same when `stride` is set to ``1``. 432 The amount of padding to is calculated by the operator internally. If the amount is even, it is 433 uniformly distributed around the input, if it is odd, the excess padding is goes to the right side. 434 If this mode is set, `padding` must be 0. 435 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 436 possible length. Extra pixels that could not complete a full stride will 437 be discarded. If this mode is set, `padding` must be 0. 438 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 439 at the begin and end is determined by the `padding` parameter. 440 If this mode is set, `padding` must be greater than or equal to 0. 441 442 padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on 443 both side of `input` when `pad_mode` is set to ``"pad"``. The 444 paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of 445 1 integer. Default: ``0`` . 446 dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution. 447 It can be a single int or a tuple of 1 integer. 448 Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a 449 spacing of :math:`d0-1` elements in the width direction. 450 The value should be in the ranges [1, L]. 451 Default: ``1`` . 452 group (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be 453 divisible by `group`. Default: ``1`` . 454 has_bias (bool, optional): Whether the Conv1d layer has a bias parameter. Default: ``False`` . 455 weight_init (Union[Tensor, str, Initializer, numbers.Number], optional): 456 Initialization method of weight parameter. 457 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 458 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 459 distributions as well as constant 'One' and 'Zero' distributions are possible. Alias ``'xavier_uniform'`` , 460 ``'he_uniform'`` , ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and lowercase are both acceptable. 461 Refer to the values of 462 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 463 for more details. Default: ``None`` , weight will be initialized using ``'HeUniform'``. 464 bias_init (Union[Tensor, str, Initializer, numbers.Number], optional): Initialization method of bias parameter. 465 Available initialization methods are the same as 'weight_init'. Refer to the values of 466 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 467 for more details. Default: ``None`` , bias will be initialized using ``'Uniform'``. 468 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 469 470 Inputs: 471 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})` . 472 473 Outputs: 474 Tensor of shape :math:`(N, C_{out}, L_{out})`. 475 476 pad_mode is ``'same'``: 477 478 .. math:: 479 L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil 480 481 pad_mode is ``'valid'``: 482 483 .. math:: 484 L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } 485 {\text{stride}}} \right \rceil 486 487 pad_mode is ``'pad'``: 488 489 .. math:: 490 L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times 491 \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor 492 493 Raises: 494 TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. 495 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 496 ValueError: If `padding` is less than 0. 497 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 498 499 Supported Platforms: 500 ``Ascend`` ``GPU`` ``CPU`` 501 502 Examples: 503 >>> import mindspore 504 >>> from mindspore import Tensor, nn 505 >>> import numpy as np 506 >>> net = nn.Conv1d(120, 240, 4, has_bias=False, weight_init='normal') 507 >>> x = Tensor(np.ones([1, 120, 640]), mindspore.float32) 508 >>> output = net(x).shape 509 >>> print(output) 510 (1, 240, 640) 511 """ 512 513 @cell_attr_register 514 def __init__(self, 515 in_channels, 516 out_channels, 517 kernel_size, 518 stride=1, 519 pad_mode='same', 520 padding=0, 521 dilation=1, 522 group=1, 523 has_bias=False, 524 weight_init=None, 525 bias_init=None, 526 dtype=mstype.float32): 527 """Initialize Conv1d.""" 528 Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) 529 Validator.check_value_type("stride", stride, [int], self.cls_name) 530 Validator.check_value_type("padding", padding, [int], self.cls_name) 531 Validator.check_value_type("dilation", dilation, [int], self.cls_name) 532 Validator.check_int(kernel_size, 1, Validator.GE, 'kernel_size', self.cls_name) 533 Validator.check_int(stride, 1, Validator.GE, 'stride', self.cls_name) 534 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 535 Validator.check_int(dilation, 1, Validator.GE, 'dilation', self.cls_name) 536 Validator.check_positive_int(group, 'group', self.cls_name) 537 if not (in_channels % group == 0 and out_channels % group == 0): 538 raise ValueError(f"The argument 'group' should be divisible by 'in_channels' " \ 539 f"and 'out_channels', but got group:{group}, in_channels:{in_channels}, " \ 540 f"out_channels:{out_channels}.") 541 kernel_size = (1, kernel_size) 542 stride = (1, stride) 543 dilation = (1, dilation) 544 get_shape = P.Shape() 545 get_dtype = P.DType() 546 if isinstance(weight_init, Tensor): 547 weight_init_shape = get_shape(weight_init) 548 Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) 549 weight_init_dtype = get_dtype(weight_init) 550 weight_init_value = weight_init.asnumpy() 551 weight_init_value = np.expand_dims(weight_init_value, 2) 552 weight_init = Tensor(weight_init_value, weight_init_dtype) 553 554 super(Conv1d, self).__init__( 555 in_channels, 556 out_channels, 557 kernel_size, 558 stride, 559 pad_mode, 560 padding, 561 dilation, 562 group, 563 has_bias, 564 weight_init, 565 bias_init, 566 dtype=dtype) 567 self.padding = (0, 0, padding, padding) 568 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 569 self.conv2d = P.Conv2D(out_channel=self.out_channels, 570 kernel_size=self.kernel_size, 571 mode=1, 572 pad_mode=self.pad_mode, 573 pad=self.padding, 574 stride=self.stride, 575 dilation=self.dilation, 576 group=self.group) 577 self.bias_add = P.BiasAdd() 578 self.expand_dims = P.ExpandDims() 579 self.squeeze = P.Squeeze(2) 580 self.shape = P.Shape() 581 582 def construct(self, x): 583 x = self.expand_dims(x, 2) 584 output = self.conv2d(x, self.weight) 585 if self.has_bias: 586 output = self.bias_add(output, self.bias) 587 588 output = self.squeeze(output) 589 return output 590 591 592class Conv3d(_Conv): 593 r""" 594 3D convolution layer. 595 596 Applies a 3D convolution over an input tensor which is typically of shape 597 :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is channel number, 598 :math:`D, H, W` are the depth, height and width of the feature map, respectively. 599 600 The output is calculated based on formula: 601 602 .. math:: 603 604 \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + 605 \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)}) 606 607 where :math:`bias` is the output channel bias, :math:`ccor` is 608 the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_, 609 :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map. 610 611 Here are the indices' meanings: 612 613 - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`, 614 where :math:`N` is the batch size of the input. 615 616 - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`, 617 where :math:`C_{out}` is the number of 618 output channels, which is also equal to the number of kernels. 619 620 - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`, 621 where :math:`C_{in}` is the number of 622 input channels, which is also equal to the number of channels in the convolutional kernels. 623 624 Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th 625 output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional 626 kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input 627 channel in the :math:`i`-th batch of the input feature map. 628 629 The shape of the convolutional kernel is given by 630 :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})` 631 where :math:`\text{kernel_size[0]}` , :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth, 632 height and width of the kernel, respectively. 633 If we consider the input and output channels as well as the `group` parameter, the complete kernel shape 634 will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, 635 \text{kernel_size[1]}, \text{kernel_size[2]})`, 636 where `group` is the number of groups dividing `x`'s input channel when applying group convolution. 637 638 For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition 639 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 640 641 Note: 642 On Ascend platform, only group convolution in depthwise convolution scenarios is supported. 643 That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. 644 645 Args: 646 in_channels (int): The channel number of the input tensor of the Conv3d layer. 647 out_channels (int): The channel number of the output tensor of the Conv3d layer. 648 kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. 649 It can be a single int or a tuple of 3 integers. A single int means the value is for depth, height 650 and the width. A tuple of 3 ints means the first value is 651 for depth and the rest is for the height and width. 652 stride (Union[int, tuple[int]], optional): The movement stride of the 3D convolution kernel. 653 The data type is an integer or a tuple of three integers. An integer represents the movement step size 654 in depth, height and width directions. A tuple of three integers represents the movement step size 655 in the depth, height and width directions respectively. Default: ``1`` . 656 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 657 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 658 659 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 660 are the same when `stride` is set to ``1``. 661 The amount of padding to is calculated by the operator internally. If the amount is even, 662 it isuniformly distributed around the input, if it is odd, the excess amount goes 663 to the front/right/bottom side. 664 If this mode is set, `padding` must be 0. 665 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 666 possible depth, height and width. Extra pixels that could not complete a full stride will 667 be discarded. If this mode is set, `padding` must be 0. 668 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 669 in the depth, height and width dimension is determined by the `padding` parameter. 670 If this mode is set, `padding` must be greater than or equal to 0. 671 672 padding (Union(int, tuple[int]), optional): The number of padding on the depth, 673 height and width directions of the input. 674 The data type is an integer or a tuple of six integers. If `padding` is an integer, 675 then the head, tail, top, bottom, left, and right padding are all equal to `padding`. 676 If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding 677 is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` 678 respectively. The value should be greater than or equal to 0. Default: ``0`` . 679 dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to use for dilated convolution. 680 It can be a single int or a tuple of 3 integers. A single int means the dilation size is the same 681 in the depth, height and width directions. A tuple of 3 ints represents the dilation size in 682 the depth, height and width directions, respectively. 683 Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a 684 spacing of :math:`d0-1` elements in the depth direction, :math:`d1-1` elements in the height direction, 685 :math:`d2-1` elements in the width direction respectively. 686 The values in the depth, height and width dimensions are in 687 the ranges [1, D], [1, H] and [1, W], respectively. 688 Default: ``1`` . 689 group (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be 690 divisible by `group`. Default: ``1`` . 691 has_bias (bool, optional): Whether the Conv3d layer has a bias parameter. Default: ``False`` . 692 weight_init (Union[Tensor, str, Initializer, numbers.Number], optional): 693 Initialization method of weight parameter. 694 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 695 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 696 distributions as well as constant ``'One'`` and ``'Zero'`` distributions are possible. Alias 697 ``'xavier_uniform'`` , ``'he_uniform'`` , ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and 698 lowercase are both acceptable. Refer to the values of 699 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 700 for more details. Default: ``None`` , weight will be initialized using ``'HeUniform'``. 701 bias_init (Union[Tensor, str, Initializer, numbers.Number], optional): Initialization method of bias parameter. 702 Available initialization methods are the same as 'weight_init'. Refer to the values of 703 `Initializer <https://www.mindspore.cn/docs/en/master/api_python/mindspore.common.initializer.html>`_, 704 for more details. Default: ``None`` , bias will be initialized using ``'Uniform'`` . 705 data_format (str, optional): The optional value for data format. Currently only support ``'NCDHW'`` . 706 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 707 708 709 Inputs: 710 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 711 Currently, input data type support float16 and float32 in CPU and GPU, and only float16 in Ascend. 712 713 Outputs: 714 Tensor of shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. 715 716 pad_mode is ``'same'`` : 717 718 .. math:: 719 \begin{array}{ll} \\ 720 D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ 721 H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ 722 W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ 723 \end{array} 724 725 726 pad_mode is ``'valid'`` : 727 728 .. math:: 729 \begin{array}{ll} \\ 730 D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } 731 {\text{stride[0]}} + 1} \right \rfloor \\ 732 H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } 733 {\text{stride[1]}} + 1} \right \rfloor \\ 734 W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } 735 {\text{stride[2]}} + 1} \right \rfloor \\ 736 \end{array} 737 738 pad_mode is ``'pad'`` : 739 740 .. math:: 741 \begin{array}{ll} \\ 742 D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times 743 \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ 744 H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times 745 \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ 746 W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times 747 \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ 748 \end{array} 749 750 Raises: 751 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 752 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. 753 ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 754 ValueError: If `padding` is less than 0. 755 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 756 ValueError: If `padding` is a tuple whose length is not equal to 6. 757 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). 758 ValueError: If `data_format` is not 'NCDHW'. 759 760 Supported Platforms: 761 ``Ascend`` ``GPU`` ``CPU`` 762 763 Examples: 764 >>> import mindspore 765 >>> from mindspore import Tensor, nn 766 >>> import numpy as np 767 >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float32) 768 >>> conv3d = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(4, 3, 3)) 769 >>> output = conv3d(x) 770 >>> print(output.shape) 771 (16, 32, 10, 32, 32) 772 """ 773 774 @cell_attr_register 775 def __init__(self, 776 in_channels, 777 out_channels, 778 kernel_size, 779 stride=1, 780 pad_mode='same', 781 padding=0, 782 dilation=1, 783 group=1, 784 has_bias=False, 785 weight_init=None, 786 bias_init=None, 787 data_format='NCDHW', 788 dtype=mstype.float32): 789 """Initialize Conv3d.""" 790 if not (in_channels % group == 0 and out_channels % group == 0): 791 raise ValueError("The argument 'group' should be divisible by 'in_channels' " \ 792 "and 'out_channels'") 793 794 kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) 795 stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) 796 dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) 797 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 798 if isinstance(padding, tuple): 799 Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) 800 super(Conv3d, self).__init__( 801 in_channels, 802 out_channels, 803 kernel_size, 804 stride, 805 pad_mode, 806 padding, 807 dilation, 808 group, 809 has_bias, 810 weight_init, 811 bias_init, 812 data_format, 813 dtype=dtype) 814 out_channels = self.out_channels // group 815 self.conv3d = P.Conv3D(out_channel=out_channels, 816 kernel_size=self.kernel_size, 817 mode=1, 818 pad_mode=self.pad_mode, 819 pad=self.padding, 820 stride=self.stride, 821 dilation=self.dilation, 822 group=1, 823 data_format=self.data_format) 824 self.bias_add = P.BiasAdd(data_format=self.data_format) 825 self.shape = P.Shape() 826 self.concat = P.Concat(1) 827 self.split_0 = P.Split(0, self.group) 828 self.split_1 = P.Split(1, self.group) 829 830 def construct(self, x): 831 if self.group == 1: 832 out = self.conv3d(x, self.weight) 833 if self.has_bias: 834 out = self.bias_add(out, self.bias) 835 else: 836 features = self.split_1(x) 837 weights = self.split_0(self.weight) 838 outputs = () 839 for i in range(self.group): 840 output = self.conv3d(features[i], weights[i]) 841 outputs = outputs + (output,) 842 out = self.concat(outputs) 843 if self.bias is not None: 844 new_shape = [1 for _ in range(out.ndim)] 845 new_shape[1] = self.out_channels 846 out = out + self.bias.reshape(new_shape) 847 return out 848 849 850class Conv3dTranspose(_Conv): 851 r""" 852 Calculates a 3D transposed convolution, which can be regarded as Conv3d for the gradient of the input. 853 It also called deconvolution (although it is not an actual deconvolution). 854 855 The input is typically of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, 856 where :math:`N` is batch size, :math:`C_{in}` is a number of 857 channels, :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. 858 859 When Conv3d and Conv3dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad', 860 :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the depth, height and width 861 directions of the input, they are inverses of each other in regard to the input and output shapes in this case. 862 However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network 863 can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_. 864 865 Args: 866 in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer. 867 out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer. 868 kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. 869 The data type is an integer or a tuple of three integers. An integer represents the depth, height 870 and width of the convolution kernel. A tuple of three integers represents the depth, height 871 and width of the convolution kernel respectively. 872 stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. 873 The data type is an integer or a tuple of three integers. An integer represents the movement step size 874 in depth, height and width directions. A tuple of three integers represents the movement step size 875 in the depth, height and width directions respectively. Default: ``1`` . 876 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 877 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 878 879 - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output 880 are the same when `stride` is set to ``1``. 881 The amount of padding to is calculated by the operator internally. If the amount is even, 882 it isuniformly distributed around the input, if it is odd, the excess amount goes 883 to the front/right/bottom side. 884 If this mode is set, `padding` must be 0. 885 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 886 possible depth, height and width. Extra pixels that could not complete a full stride will 887 be discarded. If this mode is set, `padding` must be 0. 888 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 889 in the depth, height and width dimension is determined by the `padding` parameter. 890 If this mode is set, `padding` must be greater than or equal to 0. 891 892 padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. 893 The data type is an integer or a tuple of six integers. If `padding` is an integer, 894 then the head, tail, top, bottom, left, and right padding are all equal to `padding`. 895 If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding 896 is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` 897 respectively. The value should be greater than or equal to 0. Default: ``0`` . 898 dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. The data type 899 can be a single int or a tuple of 3 integers. A single int means the dilation size is the same in the 900 depth, height and width directions. A tuple of 3 ints represents the dilation size in the depth, height 901 and width directions, respectively. 902 Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a 903 spacing of :math:`d0-1` elements in the depth direction, :math:`d1-1` elements in the height direction, 904 :math:`d2-1` elements in the width direction respectively. 905 The values in the depth, height and width dimensions are in 906 the ranges [1, D], [1, H] and [1, W], respectively. 907 Default: ``1`` . 908 group (int): Splits filter into groups, `in_channels` and `out_channels` must be 909 divisible by `group`. Default: ``1`` . 910 output_padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of 911 the output. The data type is an integer or a tuple of three integers. If `output_padding` is an integer, 912 then the depth, height, and width dimension padding are all equal to `output_padding`. 913 If `output_padding` is a tuple of three integers, then the depth, height, and width padding is equal to 914 `output_padding[0]`, `output_padding[1]` and `output_padding[2]` respectively. 915 The value should be greater than or equal to 0. 916 Default: ``0`` . 917 has_bias (bool): Whether the Conv3dTranspose layer has a bias parameter. Default: ``False`` . 918 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. 919 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 920 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 921 distributions as well as constant ``'One'`` and ``'Zero'`` distributions are possible. Alias 922 ``'xavier_uniform'`` , ``'he_uniform'`` , ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and 923 lowercase are both acceptable. Refer to the values of Initializer for more details. Default: ``None`` , 924 weight will be initialized using HeUniform. 925 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. 926 Available initialization methods are the same as 'weight_init'. Refer to the values of 927 Initializer for more details. Default: ``None`` , bias will be initialized using Uniform. 928 data_format (str): The optional value for data format. Currently only support ``'NCDHW'`` . 929 Default: ``'NCDHW'`` . 930 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 931 932 Inputs: 933 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 934 Currently input data dtype only support float16 and float32. 935 936 Outputs: 937 Tensor, the shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. 938 939 pad_mode is ``'same'`` : 940 941 .. math:: 942 \begin{array}{ll} \\ 943 D_{out} = \left \lfloor{\frac{D_{in}}{\text{stride[0]}} + 1} \right \rfloor \\ 944 H_{out} = \left \lfloor{\frac{H_{in}}{\text{stride[1]}} + 1} \right \rfloor \\ 945 W_{out} = \left \lfloor{\frac{W_{in}}{\text{stride[2]}} + 1} \right \rfloor \\ 946 \end{array} 947 948 949 pad_mode is ``'valid'`` : 950 951 .. math:: 952 \begin{array}{ll} \\ 953 D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } 954 {\text{stride[0]}} + 1} \right \rfloor \\ 955 H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } 956 {\text{stride[1]}} + 1} \right \rfloor \\ 957 W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } 958 {\text{stride[2]}} + 1} \right \rfloor \\ 959 \end{array} 960 961 pad_mode is ``'pad'`` : 962 963 .. math:: 964 \begin{array}{ll} \\ 965 D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times 966 \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ 967 H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times 968 \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ 969 W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times 970 \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ 971 \end{array} 972 973 Raises: 974 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 975 TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding` 976 is neither an int nor a tuple of three. 977 TypeError: If input data type is not float16 or float32. 978 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 979 ValueError: If `padding` is less than 0. 980 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 981 ValueError: If `padding` is a tuple whose length is not equal to 6. 982 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). 983 ValueError: If `data_format` is not 'NCDHW'. 984 985 Supported Platforms: 986 ``Ascend`` ``GPU`` ``CPU`` 987 988 Examples: 989 >>> import mindspore 990 >>> from mindspore import Tensor, nn 991 >>> import numpy as np 992 >>> x = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float32) 993 >>> conv3d_transpose = nn.Conv3dTranspose(in_channels=16, out_channels=3, kernel_size=(4, 6, 2), 994 ... pad_mode='pad') 995 >>> output = conv3d_transpose(x) 996 >>> print(output.shape) 997 (32, 3, 13, 37, 33) 998 """ 999 1000 def __init__(self, 1001 in_channels, 1002 out_channels, 1003 kernel_size, 1004 stride=1, 1005 pad_mode="same", 1006 padding=0, 1007 dilation=1, 1008 group=1, 1009 output_padding=0, 1010 has_bias=False, 1011 weight_init=None, 1012 bias_init=None, 1013 data_format='NCDHW', 1014 dtype=mstype.float32): 1015 """Initialize Conv3dTranspose.""" 1016 if not (in_channels % group == 0 and out_channels % group == 0): 1017 raise ValueError("The argument 'group' should be divisible by 'in_channels' " \ 1018 "and 'out_channels'") 1019 1020 kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) 1021 stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) 1022 dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) 1023 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 1024 if isinstance(padding, tuple): 1025 Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) 1026 self.output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, 1027 greater_zero=False) 1028 super(Conv3dTranspose, self).__init__( 1029 in_channels, 1030 out_channels, 1031 kernel_size, 1032 stride, 1033 pad_mode, 1034 padding, 1035 dilation, 1036 group, 1037 has_bias, 1038 weight_init, 1039 bias_init, 1040 data_format, 1041 transposed=True, 1042 dtype=dtype) 1043 self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels, 1044 out_channel=self.out_channels, 1045 kernel_size=self.kernel_size, 1046 mode=1, 1047 pad_mode=self.pad_mode, 1048 pad=self.padding, 1049 stride=self.stride, 1050 dilation=self.dilation, 1051 group=self.group, 1052 output_padding=self.output_padding, 1053 data_format=self.data_format) 1054 self.bias_add = P.BiasAdd(data_format=self.data_format) 1055 self.shape = P.Shape() 1056 1057 def construct(self, x): 1058 output = self.conv3d_transpose(x, self.weight) 1059 if self.has_bias: 1060 output = self.bias_add(output, self.bias) 1061 return output 1062 1063 1064def _deconv_output_length(is_valid, is_same, is_pad, input_length, filter_size, stride_size, dilation_size, padding): 1065 """Calculate the width and height of output.""" 1066 length = 0 1067 filter_size = filter_size + (filter_size - 1) * (dilation_size - 1) 1068 if is_valid: 1069 if filter_size - stride_size > 0: 1070 length = input_length * stride_size + filter_size - stride_size 1071 else: 1072 length = input_length * stride_size 1073 elif is_same: 1074 length = input_length * stride_size 1075 elif is_pad: 1076 length = input_length * stride_size - padding + filter_size - stride_size 1077 1078 return length 1079 1080 1081class Conv2dTranspose(_Conv): 1082 r""" 1083 Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input, 1084 also called deconvolution (although it is not an actual deconvolution). 1085 1086 The input is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, 1087 where :math:`N` is batch size, :math:`C_{in}` is space dimension, 1088 :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. 1089 1090 When Conv2d and Conv2dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad', 1091 :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the height and width 1092 directions of the input, they are inverses of each other in regard to the input and output shapes in this case. 1093 However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network 1094 can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_. 1095 1096 Args: 1097 in_channels (int): The channel number of the input tensor of the Conv2dTranspose layer. 1098 out_channels (int): The channel number of the output tensor of the Conv2dTranspose layer. 1099 kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. 1100 The data type is an integer or a tuple of two integers. An integer represents the height 1101 and width of the convolution kernel. A tuple of two integers represents the height 1102 and width of the convolution kernel respectively. 1103 stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. 1104 The data type is an integer or a tuple of two integers. An integer represents the movement step size 1105 in both height and width directions. A tuple of two integers represents the movement step size in the height 1106 and width directions respectively. Default: ``1`` . 1107 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1108 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 1109 1110 - ``"same"``: Pad the input around its edges so that the shape of input and output 1111 are the same when `stride` is set to ``1``. 1112 The amount of padding to is calculated by the operator internally, If the amount is even, it is 1113 uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side. 1114 If this mode is set, `padding` must be 0. 1115 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 1116 possible height and width. Extra pixels that could not complete a full stride will 1117 be discarded. If this mode is set, `padding` must be 0. 1118 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 1119 in the height and width directions is determined by the `padding` parameter. 1120 If this mode is set, `padding` must be greater than or equal to 0. 1121 1122 padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. 1123 The data type is an integer or a tuple of four integers. If `padding` is an integer, 1124 then the top, bottom, left, and right padding are all equal to `padding`. 1125 If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding 1126 is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. 1127 The value should be greater than or equal to 0. Default: ``0`` . 1128 output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output. 1129 The data type is an integer or a tuple of two integers. If `output_padding` is an integer, 1130 then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of 1131 2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]` 1132 respectively. If `output_padding` is not equal to 0, `pad_mode` must be `pad`. 1133 The value should be in range of `[0, max(stride, dilation))` . Default: ``0`` . 1134 dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. 1135 It can be a single int or a tuple of 2 integers. A single int means the dilation size is the same 1136 in both the height and width directions. A tuple of two ints represents the dilation size in 1137 the height and width directions, respectively. 1138 Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a 1139 spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction. 1140 The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively. 1141 Default: ``1`` . 1142 group (int): Splits filter into groups, `in_channels` and `out_channels` must be divisible by `group`. 1143 Default: ``1`` . 1144 has_bias (bool): Whether the Conv2dTranspose layer has a bias parameter. Default: ``False`` . 1145 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. 1146 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 1147 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 1148 distributions as well as constant ``'One'`` and ``'Zero'`` distributions are possible. Alias 1149 ``'xavier_uniform'`` , ``'he_uniform'`` , ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and 1150 lowercase are both acceptable. Refer to the values of Initializer for more details. Default: ``None`` , 1151 weight will be initialized using HeUniform. 1152 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. 1153 Available initialization methods are the same as 'weight_init'. Refer to the values of 1154 Initializer for more details. Default: ``None`` , bias will be initialized using Uniform. 1155 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 1156 1157 Inputs: 1158 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 1159 1160 Outputs: 1161 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. 1162 1163 pad_mode is ``'same'``: 1164 1165 .. math:: 1166 \begin{array}{ll} \\ 1167 H_{out} = \text H_{in}\times \text {stride[0]} \\ 1168 W_{out} = \text W_{in}\times \text {stride[1]} \\ 1169 \end{array} 1170 1171 pad_mode is ``'valid'``: 1172 1173 .. math:: 1174 \begin{array}{ll} \\ 1175 H_{out} = \text H_{in}\times \text {stride[0]} + \max\{(\text{dilation[0]} - 1) \times 1176 (\text{kernel_size[0]} - 1) - \text {stride[0]}, 0 \} \\ 1177 W_{out} = \text W_{in}\times \text {stride[1]} + \max\{(\text{dilation[1]} - 1) \times 1178 (\text{kernel_size[1]} - 1) - \text {stride[1]}, 0 \} \\ 1179 \end{array} 1180 1181 pad_mode is ``'pad'``: 1182 1183 .. math:: 1184 \begin{array}{ll} \\ 1185 H_{out} = \text H_{in}\times \text {stride[0]} - (padding[0] + padding[1]) 1186 + \text{kernel_size[0]} + (\text{dilation[0]} - 1) \times 1187 (\text{kernel_size[0]} - 1) - \text {stride[0]} + \text {output_padding[0]} \\ 1188 W_{out} = \text W_{in}\times \text {stride[1]} - (padding[2] + padding[3]) 1189 + \text{kernel_size[1]} + (\text{dilation[1]} - 1) \times 1190 (\text{kernel_size[1]} - 1) - \text {stride[1]} + \text {output_padding[1]} \\ 1191 \end{array} 1192 1193 Raises: 1194 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 1195 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. 1196 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 1197 ValueError: If `padding` is less than 0. 1198 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 1199 ValueError: If `padding` is a tuple whose length is not equal to 4. 1200 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). 1201 1202 Supported Platforms: 1203 ``Ascend`` ``GPU`` ``CPU`` 1204 1205 Examples: 1206 >>> import mindspore 1207 >>> from mindspore import Tensor, nn 1208 >>> import numpy as np 1209 >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') 1210 >>> x = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32) 1211 >>> output = net(x).shape 1212 >>> print(output) 1213 (1, 64, 19, 53) 1214 """ 1215 1216 def __init__(self, 1217 in_channels, 1218 out_channels, 1219 kernel_size, 1220 stride=1, 1221 pad_mode='same', 1222 padding=0, 1223 output_padding=0, 1224 dilation=1, 1225 group=1, 1226 has_bias=False, 1227 weight_init=None, 1228 bias_init=None, 1229 dtype=mstype.float32): 1230 """Initialize Conv2dTranspose.""" 1231 kernel_size = twice(kernel_size) 1232 stride = twice(stride) 1233 dilation = twice(dilation) 1234 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 1235 if isinstance(padding, tuple): 1236 Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name) 1237 Validator.check_value_type('output_padding', output_padding, (int, tuple), self.cls_name) 1238 if isinstance(output_padding, tuple): 1239 Validator.check_equal_int(len(output_padding), 2, 'output_padding size', self.cls_name) 1240 # out_channels and in_channels swap. 1241 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, 1242 # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. 1243 super(Conv2dTranspose, self).__init__( 1244 in_channels, 1245 out_channels, 1246 kernel_size, 1247 stride, 1248 pad_mode, 1249 padding, 1250 dilation, 1251 group, 1252 has_bias, 1253 weight_init, 1254 bias_init, 1255 transposed=True, 1256 dtype=dtype) 1257 1258 self.in_channels = in_channels 1259 self.out_channels = out_channels 1260 self.shape = P.Shape() 1261 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 1262 self.is_valid = self.pad_mode == 'valid' 1263 self.is_same = self.pad_mode == 'same' 1264 self.is_pad = self.pad_mode == 'pad' 1265 self.output_padding = output_padding 1266 1267 # cause Conv2DTranspose's out_channel refers to Conv2D's out_channel. 1268 self.conv2d_transpose = P.Conv2DTranspose(out_channel=in_channels, 1269 kernel_size=kernel_size, 1270 mode=1, 1271 pad_mode=pad_mode, 1272 pad=padding, 1273 stride=stride, 1274 dilation=dilation, 1275 group=group) 1276 self.bias_add = P.BiasAdd() 1277 if isinstance(self.padding, int): 1278 self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4 1279 else: 1280 self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding 1281 1282 def shard(self, strategy): 1283 self.conv2d_transpose.shard(strategy) 1284 return self 1285 1286 def construct(self, x): 1287 n, _, h, w = self.shape(x) 1288 h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0], 1289 self.stride[0], self.dilation[0], self.padding_top + self.padding_bottom) 1290 w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1], 1291 self.stride[1], self.dilation[1], self.padding_left + self.padding_right) 1292 conv2d_trans_ret = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) 1293 if self.has_bias: 1294 conv2d_trans_ret = self.bias_add(conv2d_trans_ret, self.bias) 1295 if isinstance(self.output_padding, tuple): 1296 if self.output_padding[0] < 0 or self.output_padding[0] >= max(self.dilation[0], self.stride[0]): 1297 raise ValueError("output_padding[0] must be in range of [0, max(stride_h, dilation_h)).") 1298 if self.output_padding[1] < 0 or self.output_padding[1] >= max(self.dilation[1], self.stride[1]): 1299 raise ValueError("output_padding[1] must be in range of [0, max(stride_w, dilation_w)).") 1300 if not self.is_pad and (self.output_padding[0] > 0 or self.output_padding[1] > 0): 1301 raise ValueError("when output_padding is not zero, pad_mode must be 'pad'") 1302 1303 pad = P.Pad(paddings=((0, 0), (0, 0), (0, self.output_padding[0]), (0, self.output_padding[1]))) 1304 return pad(conv2d_trans_ret) 1305 1306 if self.output_padding == 0: 1307 return conv2d_trans_ret 1308 1309 if self.output_padding < 0 or self.output_padding >= max(self.dilation[0], self.stride[0]): 1310 raise ValueError("output_padding must be in range of [0, max(stride_h, dilation_h)).") 1311 if self.output_padding < 0 or self.output_padding >= max(self.dilation[1], self.stride[1]): 1312 raise ValueError("output_padding must be in range of [0, max(stride_w, dilation_w)).") 1313 if not self.is_pad and self.output_padding > 0: 1314 raise ValueError("when output_padding is not zero, pad_mode must be 'pad'") 1315 pad = P.Pad(paddings=((0, 0), (0, 0), (0, self.output_padding), (0, self.output_padding))) 1316 return pad(conv2d_trans_ret) 1317 1318 1319@_primexpr 1320def _check_input_3d(input_shape, op_name): 1321 if len(input_shape) != 3: 1322 raise ValueError(f"For '{op_name}', the dimension of input must be 3d, but got {len(input_shape)}.") 1323 1324 1325class Conv1dTranspose(_Conv): 1326 r""" 1327 Calculates a 1D transposed convolution, which can be regarded as Conv1d for the gradient of the input, 1328 also called deconvolution (although it is not an actual deconvolution). 1329 1330 The input is typically of shape :math:`(N, C_{in}, L_{in})`, where :math:`N` is batch size, 1331 :math:`C_{in}` is a number of channels 1332 and :math:`L_{in}` is a length of sequence. 1333 1334 When Conv1d and ConvTranspose1d are initialized with the same parameters, and `pad_mode` is set to 'pad', 1335 :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to both sizes of input, 1336 they are inverses of each other in regard to the input and output shapes in this case. 1337 However, when `stride` > 1, Conv1d maps multiple input shapes to the same output shape. Deconvolutional network 1338 can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_. 1339 1340 Args: 1341 in_channels (int): The channel number of the input tensor of the Conv1dTranspose layer. 1342 out_channels (int): The channel number of the output tensor of the Conv1dTranspose layer. 1343 kernel_size (int): Specifies the width of the 1D convolution kernel. 1344 stride (int): The movement stride of the 1D convolution kernel. Default: ``1`` . 1345 pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to: 1346 ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"same"`` . 1347 1348 - ``"same"``: Pad the input at the begin and end so that the shape of input and output 1349 are the same when `stride` is set to ``1``. 1350 The amount of padding to is calculated by the operator internally. If the amount is even, it is 1351 uniformly distributed around the input, if it is odd, the excess padding is goes to the right side. 1352 If this mode is set, `padding` must be 0. 1353 - ``"valid"``: No padding is applied to the input, and the output returns the maximum 1354 possible length. Extra pixels that could not complete a full stride will 1355 be discarded. If this mode is set, `padding` must be 0. 1356 - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding 1357 at the begin and end is determined by the `padding` parameter. 1358 If this mode is set, `padding` must be greater than or equal to 0. 1359 1360 padding (int): The number of padding on both sides of input. 1361 The value should be greater than or equal to 0. Default: ``0`` . 1362 dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled 1363 every `k` elements. The value of `k` is in range of [1, L]. Default: ``1`` . 1364 group (int): Splits filter into groups, `in_channels` and `out_channels` must be 1365 divisible by `group`. When `group` > 1, the Ascend platform is not supported yet. Default: ``1`` . 1366 has_bias (bool): Whether the Conv1dTranspose layer has a bias parameter. Default: ``False``. 1367 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. 1368 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 1369 values from ``'TruncatedNormal'`` , ``'Normal'`` , ``'Uniform'`` , ``'HeUniform'`` and ``'XavierUniform'`` 1370 distributions as well as constant ``'One'`` and ``'Zero'`` distributions are possible. Alias 1371 ``'xavier_uniform'`` , ``'he_uniform'``, ``'ones'`` and ``'zeros'`` are acceptable. Uppercase and lowercase 1372 are both acceptable. Refer to the values of Initializer for more details. Default: ``None`` , 1373 weight will be initialized using HeUniform. 1374 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. 1375 Available initialization methods are the same as 'weight_init'. Refer to the values of 1376 Initializer for more details. Default: ``None`` , bias will be initialized using Uniform. 1377 dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``mstype.float32`` . 1378 1379 Inputs: 1380 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})`. 1381 1382 Outputs: 1383 Tensor of shape :math:`(N, C_{out}, L_{out})`. 1384 1385 pad_mode is ``'same'``: :math:`L_{out} = \frac{ L_{in} + \text{stride} - 1 }{ \text{stride} }` 1386 1387 pad_mode is ``'valid'``: 1388 :math:`L_{out} = (L_{in} - 1) \times \text{stride} + \text{dilation} \times (\text{kernel_size} - 1) + 1` 1389 1390 pad_mode is ``'pad'``: 1391 :math:`L_{out} = (L_{in} - 1) \times \text{stride} - 2 \times \text{padding} 1392 + \text{dilation} \times (\text{kernel_size} - 1) + 1` 1393 1394 Raises: 1395 TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. 1396 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 1397 ValueError: If `padding` is less than 0. 1398 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 1399 1400 Supported Platforms: 1401 ``Ascend`` ``GPU`` ``CPU`` 1402 1403 Examples: 1404 >>> import mindspore 1405 >>> from mindspore import Tensor, nn 1406 >>> import numpy as np 1407 >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') 1408 >>> x = Tensor(np.ones([1, 3, 50]), mindspore.float32) 1409 >>> output = net(x).shape 1410 >>> print(output) 1411 (1, 64, 53) 1412 """ 1413 1414 def __init__(self, 1415 in_channels, 1416 out_channels, 1417 kernel_size, 1418 stride=1, 1419 pad_mode='same', 1420 padding=0, 1421 dilation=1, 1422 group=1, 1423 has_bias=False, 1424 weight_init=None, 1425 bias_init=None, 1426 dtype=mstype.float32): 1427 """Initialize Conv1dTranspose.""" 1428 Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) 1429 Validator.check_value_type("stride", stride, [int], self.cls_name) 1430 Validator.check_value_type("padding", padding, [int], self.cls_name) 1431 Validator.check_value_type("dilation", dilation, [int], self.cls_name) 1432 Validator.check_int(kernel_size, 1, Validator.GE, 'kernel_size', self.cls_name) 1433 Validator.check_int(stride, 1, Validator.GE, 'stride', self.cls_name) 1434 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 1435 Validator.check_int(dilation, 1, Validator.GE, 'dilation', self.cls_name) 1436 kernel_size = (1, kernel_size) 1437 stride = (1, stride) 1438 dilation = (1, dilation) 1439 get_shape = P.Shape() 1440 get_dtype = P.DType() 1441 if isinstance(weight_init, Tensor): 1442 weight_init_shape = get_shape(weight_init) 1443 Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) 1444 weight_init_dtype = get_dtype(weight_init) 1445 weight_init_value = weight_init.asnumpy() 1446 weight_init_value = np.expand_dims(weight_init_value, 2) 1447 weight_init = Tensor(weight_init_value, weight_init_dtype) 1448 # out_channels and in_channels swap. 1449 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, 1450 # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. 1451 super(Conv1dTranspose, self).__init__( 1452 in_channels, 1453 out_channels, 1454 kernel_size, 1455 stride, 1456 pad_mode, 1457 padding, 1458 dilation, 1459 group, 1460 has_bias, 1461 weight_init, 1462 bias_init, 1463 transposed=True, 1464 dtype=dtype) 1465 self.padding = (0, 0, padding, padding) 1466 self.in_channels = in_channels 1467 self.out_channels = out_channels 1468 self.shape = P.Shape() 1469 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 1470 self.is_valid = self.pad_mode == 'valid' 1471 self.is_same = self.pad_mode == 'same' 1472 self.is_pad = self.pad_mode == 'pad' 1473 1474 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. 1475 self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, 1476 kernel_size=kernel_size, 1477 mode=1, 1478 pad_mode=pad_mode, 1479 pad=self.padding, 1480 stride=stride, 1481 dilation=dilation, 1482 group=group) 1483 self.bias_add = P.BiasAdd() 1484 self.expand_dims = P.ExpandDims() 1485 self.squeeze = P.Squeeze(2) 1486 1487 def shard(self, strategy): 1488 self.conv2d_transpose.shard(strategy) 1489 return self 1490 1491 def construct(self, x): 1492 x_shape = self.shape(x) 1493 _check_input_3d(x_shape, self.cls_name) 1494 x = self.expand_dims(x, 2) 1495 n, _, h, w = self.shape(x) 1496 h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0], 1497 self.stride[0], self.dilation[0], self.padding[0] + self.padding[1]) 1498 w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1], 1499 self.stride[1], self.dilation[1], self.padding[2] + self.padding[3]) 1500 output = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) 1501 if self.has_bias: 1502 output = self.bias_add(output, self.bias) 1503 1504 output = self.squeeze(output) 1505 return output 1506