1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""conv""" 16import numpy as np 17from mindspore import log as logger 18from mindspore import context 19from mindspore.ops import operations as P 20from mindspore.ops.primitive import constexpr 21from mindspore.common.parameter import Parameter 22from mindspore.common.initializer import initializer 23from mindspore.common.tensor import Tensor 24from mindspore._checkparam import Validator, Rel, twice, _check_3d_int_or_tuple 25from mindspore._extends import cell_attr_register 26from ..cell import Cell 27 28__all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose', 'Conv3d', 'Conv3dTranspose'] 29 30 31class _Conv(Cell): 32 """ 33 Applies a N-D convolution over an input signal composed of several input planes. 34 """ 35 36 def __init__(self, 37 in_channels, 38 out_channels, 39 kernel_size, 40 stride, 41 pad_mode, 42 padding, 43 dilation, 44 group, 45 has_bias, 46 weight_init, 47 bias_init, 48 data_format='NCHW', 49 transposed=False): 50 """Initialize _Conv.""" 51 super(_Conv, self).__init__() 52 self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name) 53 self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name) 54 self.kernel_size = kernel_size 55 self.stride = stride 56 self.pad_mode = pad_mode 57 self.weight_init = weight_init 58 self.bias_init = bias_init 59 self.format = Validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.cls_name) 60 if context.get_context("device_target") != "GPU" and self.format == "NHWC": 61 raise ValueError(f"For '{self.cls_name}', the \"NHWC\" format only support in GPU target, " 62 f"but got the format is {self.format} and " 63 f"the platform is {context.get_context('device_target')}.") 64 if context.get_context("device_target") == "CPU" and self.format == "NCDHW": 65 raise ValueError(f"For '{self.cls_name}', the \"NCDHW\" format only support in Ascend and GPU target, " 66 f"but got the format is {self.format} and " 67 f"the platform is {context.get_context('device_target')}.") 68 if isinstance(padding, int): 69 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 70 self.padding = padding 71 elif isinstance(padding, tuple): 72 for pad in padding: 73 Validator.check_non_negative_int(pad, 'padding item', self.cls_name) 74 self.padding = padding 75 else: 76 raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int or tuple(int), " 77 f"but got {type(padding).__name__}.") 78 79 self.dilation = dilation 80 self.group = Validator.check_positive_int(group) 81 self.has_bias = has_bias 82 for kernel_size_elem in kernel_size: 83 Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) 84 for stride_elem in stride: 85 Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) 86 for dilation_elem in dilation: 87 Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) 88 if in_channels % group != 0: 89 raise ValueError(f"For '{self.cls_name}', the attr 'in_channels' must be divisible by attr 'group', " 90 f"but got 'in_channels': {in_channels} and 'group': {group}.") 91 if out_channels % group != 0: 92 raise ValueError(f"For '{self.cls_name}', the 'out_channels' must be divisible by attr 'group', " 93 f"but got 'out_channels': {out_channels} and 'group': {group}.") 94 if transposed: 95 shape = [in_channels, out_channels // group, *kernel_size] 96 else: 97 shape = [out_channels, *kernel_size, in_channels // group] if self.format == "NHWC" else \ 98 [out_channels, in_channels // group, *kernel_size] 99 self.weight = Parameter(initializer(self.weight_init, shape), name='weight') 100 101 if Validator.check_bool(has_bias, "has_bias", self.cls_name): 102 self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias') 103 else: 104 if self.bias_init != 'zeros': 105 logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") 106 self.bias = None 107 108 def construct(self, *inputs): 109 """Must be overridden by all subclasses.""" 110 raise NotImplementedError 111 112 113class Conv2d(_Conv): 114 r""" 115 2D convolution layer. 116 117 Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, 118 where :math:`N` is batch size, :math:`C_{in}` is channel number, and :math:`H_{in}, W_{in}` are height and width. 119 For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as: 120 121 .. math:: 122 123 out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j, 124 125 where :math:`ccor` is the cross-correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges 126 from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th 127 filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice 128 of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, 129 where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of 130 the convolution kernel. The full kernel has shape 131 :math:`(C_{out}, C_{in} // \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, 132 where group is the group number to split the input `x` in the channel dimension. 133 134 If the 'pad_mode' is set to be "valid", the output height and width will be 135 :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} - 136 (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and 137 :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} - 138 (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` respectively. 139 140 The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition 141 <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 142 143 Args: 144 in_channels (int): The number of input channel :math:`C_{in}`. 145 out_channels (int): The number of output channel :math:`C_{out}`. 146 kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height 147 and width of the 2D convolution window. Single int means the value is for both the height and the width of 148 the kernel. A tuple of 2 ints means the first value is for the height and the other is for the 149 width of the kernel. 150 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 151 the height and width of movement are both strides, or a tuple of two int numbers that 152 represent height and width of movement respectively. Default: 1. 153 pad_mode (str): Specifies padding mode. The optional values are 154 "same", "valid", "pad". Default: "same". 155 156 - same: Adopts the way of completion. The height and width of the output will be the same as 157 the input `x`. The total number of padding will be calculated in horizontal and vertical 158 directions and evenly distributed to top and bottom, left and right if possible. Otherwise, the 159 last extra padding will be done from the bottom and the right side. If this mode is set, `padding` 160 must be 0. 161 162 - valid: Adopts the way of discarding. The possible largest height and width of output will be returned 163 without padding. Extra pixels will be discarded. If this mode is set, `padding` 164 must be 0. 165 166 - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input 167 Tensor borders. `padding` must be greater than or equal to 0. 168 169 padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer, 170 the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple 171 with four integers, the paddings of top, bottom, left and right will be equal to padding[0], 172 padding[1], padding[2], and padding[3] accordingly. Default: 0. 173 dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate 174 to use for dilated convolution. If set to be :math:`k > 1`, there will 175 be :math:`k - 1` pixels skipped for each sampling location. Its value must 176 be greater or equal to 1 and bounded by the height and width of the 177 input `x`. Default: 1. 178 group (int): Splits filter into groups, `in_ channels` and `out_channels` must be 179 divisible by the number of groups. If the group is equal to `in_channels` and `out_channels`, 180 this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. 181 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 182 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. 183 It can be a Tensor, a string, an Initializer or a number. When a string is specified, 184 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 185 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 186 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 187 Initializer for more details. Default: 'normal'. 188 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 189 Initializer and string are the same as 'weight_init'. Refer to the values of 190 Initializer for more details. Default: 'zeros'. 191 data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. 192 Default: 'NCHW'. 193 194 Inputs: 195 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` \ 196 or :math:`(N, H_{in}, W_{in}, C_{in})`. 197 198 Outputs: 199 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`. 200 201 Raises: 202 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 203 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. 204 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 205 ValueError: If `padding` is less than 0. 206 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 207 ValueError: If `padding` is a tuple whose length is not equal to 4. 208 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). 209 ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. 210 211 Supported Platforms: 212 ``Ascend`` ``GPU`` ``CPU`` 213 214 Examples: 215 >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal') 216 >>> x = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32) 217 >>> output = net(x).shape 218 >>> print(output) 219 (1, 240, 1024, 640) 220 """ 221 222 @cell_attr_register 223 def __init__(self, 224 in_channels, 225 out_channels, 226 kernel_size, 227 stride=1, 228 pad_mode='same', 229 padding=0, 230 dilation=1, 231 group=1, 232 has_bias=False, 233 weight_init='normal', 234 bias_init='zeros', 235 data_format='NCHW'): 236 """Initialize Conv2d.""" 237 kernel_size = twice(kernel_size) 238 stride = twice(stride) 239 self._dilation = dilation 240 dilation = twice(dilation) 241 super(Conv2d, self).__init__( 242 in_channels, 243 out_channels, 244 kernel_size, 245 stride, 246 pad_mode, 247 padding, 248 dilation, 249 group, 250 has_bias, 251 weight_init, 252 bias_init, 253 data_format) 254 self.conv2d = P.Conv2D(out_channel=self.out_channels, 255 kernel_size=self.kernel_size, 256 mode=1, 257 pad_mode=self.pad_mode, 258 pad=self.padding, 259 stride=self.stride, 260 dilation=self.dilation, 261 group=self.group, 262 data_format=self.format) 263 self.bias_add = P.BiasAdd(data_format=self.format) 264 265 def construct(self, x): 266 output = self.conv2d(x, self.weight) 267 if self.has_bias: 268 output = self.bias_add(output, self.bias) 269 return output 270 271 def extend_repr(self): 272 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 273 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 274 'group={}, has_bias={}, ' \ 275 'weight_init={}, bias_init={}, format={}'.format( 276 self.in_channels, 277 self.out_channels, 278 self.kernel_size, 279 self.stride, 280 self.pad_mode, 281 self.padding, 282 self.dilation, 283 self.group, 284 self.has_bias, 285 self.weight_init, 286 self.bias_init, 287 self.format) 288 return s 289 290 291@constexpr 292def _check_input_3d(input_shape, op_name): 293 if len(input_shape) != 3: 294 raise ValueError(f"For '{op_name}', the dimension of input should be 3d, but got {len(input_shape)}.") 295 296 297class Conv1d(_Conv): 298 r""" 299 1D convolution layer. 300 301 Applies a 1D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, W_{in})`, 302 where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape 303 :math:`(C_{in}, W_{in})`, the formula is defined as: 304 305 .. math:: 306 307 out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j, 308 309 where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges 310 from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th 311 filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice 312 of kernel and it has shape :math:`(\text{ks_w})`, where :math:`\text{ks_w}` is the width of the convolution kernel. 313 The full kernel has shape :math:`(C_{out}, C_{in} // \text{group}, \text{ks_w})`, where group is the group number 314 to split the input `x` in the channel dimension. 315 316 If the 'pad_mode' is set to be "valid", the output width will be 317 :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} - 318 (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively. 319 320 The first introduction of convolution layer can be found in paper `Gradient Based Learning Applied to Document 321 Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. 322 323 Args: 324 in_channels (int): The number of input channel :math:`C_{in}`. 325 out_channels (int): The number of output channel :math:`C_{out}`. 326 kernel_size (int): The data type is int. Specifies the 327 width of the 1D convolution window. 328 stride (int): The distance of kernel moving, an int number that represents 329 the width of movement. Default: 1. 330 pad_mode (str): Specifies padding mode. The optional values are 331 "same", "valid", "pad". Default: "same". 332 333 - same: Adopts the way of completion. The output width will be the same as the input `x`. 334 The total number of padding will be calculated in the horizontal 335 direction and evenly distributed to left and right if possible. Otherwise, the 336 last extra padding will be done from the bottom and the right side. If this mode is set, `padding` 337 must be 0. 338 339 - valid: Adopts the way of discarding. The possible largest width of the output will be returned 340 without padding. Extra pixels will be discarded. If this mode is set, `padding` 341 must be 0. 342 343 - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input 344 Tensor borders. `padding` must be greater than or equal to 0. 345 346 padding (int): Implicit paddings on both sides of the input `x`. Default: 0. 347 dilation (int): The data type is int. Specifies the dilation rate 348 to use for dilated convolution. If set to be :math:`k > 1`, there will 349 be :math:`k - 1` pixels skipped for each sampling location. Its value must 350 be greater or equal to 1 and bounded by the height and width of the 351 input `x`. Default: 1. 352 group (int): Splits filter into groups, `in_ channels` and `out_channels` must be 353 divisible by the number of groups. Default: 1. 354 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 355 weight_init (Union[Tensor, str, Initializer, numbers.Number]): An initializer for the convolution kernel. 356 It can be a Tensor, a string, an Initializer or a number. When a string is specified, 357 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 358 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 359 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 360 Initializer for more details. Default: 'normal'. 361 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 362 Initializer and string are the same as 'weight_init'. Refer to the values of 363 Initializer for more details. Default: 'zeros'. 364 365 Inputs: 366 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`. 367 368 Outputs: 369 Tensor of shape :math:`(N, C_{out}, W_{out})`. 370 371 Raises: 372 TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. 373 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 374 ValueError: If `padding` is less than 0. 375 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 376 377 Supported Platforms: 378 ``Ascend`` ``GPU`` ``CPU`` 379 380 Examples: 381 >>> net = nn.Conv1d(120, 240, 4, has_bias=False, weight_init='normal') 382 >>> x = Tensor(np.ones([1, 120, 640]), mindspore.float32) 383 >>> output = net(x).shape 384 >>> print(output) 385 (1, 240, 640) 386 """ 387 388 @cell_attr_register 389 def __init__(self, 390 in_channels, 391 out_channels, 392 kernel_size, 393 stride=1, 394 pad_mode='same', 395 padding=0, 396 dilation=1, 397 group=1, 398 has_bias=False, 399 weight_init='normal', 400 bias_init='zeros'): 401 """Initialize Conv1d.""" 402 Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) 403 Validator.check_value_type("stride", stride, [int], self.cls_name) 404 Validator.check_value_type("padding", padding, [int], self.cls_name) 405 Validator.check_value_type("dilation", dilation, [int], self.cls_name) 406 Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) 407 Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) 408 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 409 Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) 410 kernel_size = (1, kernel_size) 411 stride = (1, stride) 412 dilation = (1, dilation) 413 get_shape = P.Shape() 414 get_dtype = P.DType() 415 if isinstance(weight_init, Tensor): 416 weight_init_shape = get_shape(weight_init) 417 Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) 418 weight_init_dtype = get_dtype(weight_init) 419 weight_init_value = weight_init.asnumpy() 420 weight_init_value = np.expand_dims(weight_init_value, 2) 421 weight_init = Tensor(weight_init_value, weight_init_dtype) 422 423 super(Conv1d, self).__init__( 424 in_channels, 425 out_channels, 426 kernel_size, 427 stride, 428 pad_mode, 429 padding, 430 dilation, 431 group, 432 has_bias, 433 weight_init, 434 bias_init) 435 self.padding = (0, 0, padding, padding) 436 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 437 self.conv2d = P.Conv2D(out_channel=self.out_channels, 438 kernel_size=self.kernel_size, 439 mode=1, 440 pad_mode=self.pad_mode, 441 pad=self.padding, 442 stride=self.stride, 443 dilation=self.dilation, 444 group=self.group) 445 self.bias_add = P.BiasAdd() 446 self.expand_dims = P.ExpandDims() 447 self.squeeze = P.Squeeze(2) 448 self.shape = P.Shape() 449 450 def construct(self, x): 451 x_shape = self.shape(x) 452 _check_input_3d(x_shape, self.cls_name) 453 x = self.expand_dims(x, 2) 454 output = self.conv2d(x, self.weight) 455 if self.has_bias: 456 output = self.bias_add(output, self.bias) 457 458 output = self.squeeze(output) 459 return output 460 461 def extend_repr(self): 462 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 463 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 464 'group={}, has_bias={}, ' \ 465 'weight_init={}, bias_init={}'.format( 466 self.in_channels, 467 self.out_channels, 468 self.kernel_size, 469 self.stride, 470 self.pad_mode, 471 self.padding, 472 self.dilation, 473 self.group, 474 self.has_bias, 475 self.weight_init, 476 self.bias_init) 477 return s 478 479 480@constexpr 481def _check_input_5dims(input_shape, op_name): 482 if len(input_shape) != 5: 483 raise ValueError(f"For '{op_name}', the dimension of input should be 5d, but got {len(input_shape)}.") 484 485 486class Conv3d(_Conv): 487 r""" 488 3D convolution layer. 489 490 Applies a 3D convolution over an input tensor which is typically of shape 491 :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape 492 :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. where :math:`N` is batch size. :math:`C` is channel number. 493 the formula is defined as: 494 495 .. math:: 496 497 \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+ 498 \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right), 499 \operatorname{input}\left(N_{i}, k\right)) 500 501 where :math:`ccor` is the cross-correlation operator. 502 503 If the 'pad_mode' is set to be "valid", the output depth, height and width will be 504 :math:`\left \lfloor{1 + \frac{D_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} - 505 (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and 506 :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} - 507 (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` and 508 :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[4]} + \text{padding[5]} - \text{kernel_size[2]} - 509 (\text{kernel_size[2]} - 1) \times (\text{dilation[2]} - 1) }{\text{stride[2]}}} \right \rfloor` respectively. 510 511 Args: 512 in_channels (int): The number of input channel :math:`C_{in}`. 513 out_channels (int): The number of output channel :math:`C_{out}`. 514 kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers. 515 Specifies the depth, height and width of the 3D convolution window. 516 Single int means the value is for the depth, height and the width of the kernel. 517 A tuple of 3 ints means the first value is for the depth, second value is for height and the 518 other is for the width of the kernel. 519 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 520 the depth, height and width of movement are both strides, or a tuple of three int numbers that 521 represent depth, height and width of movement respectively. Default: 1. 522 pad_mode (str): Specifies padding mode. The optional values are 523 "same", "valid", "pad". Default: "same". 524 525 - same: Adopts the way of completion. The depth, height and width of the output will be the same as 526 the input `x`. The total number of padding will be calculated in depth, horizontal and vertical 527 directions and evenly distributed to head and tail, top and bottom, left and right if possible. 528 Otherwise, the last extra padding will be done from the tail, bottom and the right side. 529 If this mode is set, `padding` must be 0. 530 531 - valid: Adopts the way of discarding. The possible largest depth, height and width of output 532 will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding` 533 must be 0. 534 535 - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `padding` 536 will be padded to the input Tensor borders. `padding` must be greater than or equal to 0. 537 538 padding (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`. 539 The data type is int or a tuple of 6 integers. Default: 0. If `padding` is an integer, 540 the paddings of head, tail, top, bottom, left and right are the same, equal to padding. 541 If `paddings` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to 542 padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly. 543 dilation (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers 544 : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1. 545 Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, 546 there will be :math:`k - 1` pixels skipped for each sampling location. 547 Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1. 548 group (int): Splits filter into groups, `in_ channels` and `out_channels` must be 549 divisible by the number of groups. Default: 1. Only 1 is currently supported. 550 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 551 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. 552 It can be a Tensor, a string, an Initializer or a number. When a string is specified, 553 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 554 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 555 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 556 Initializer for more details. Default: 'normal'. 557 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 558 Initializer and string are the same as 'weight_init'. Refer to the values of 559 Initializer for more details. Default: 'zeros'. 560 data_format (str): The optional value for data format. Currently only support "NCDHW". 561 562 Inputs: 563 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 564 Currently input data type only support float16 and float32. 565 566 Outputs: 567 Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. 568 569 Raises: 570 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 571 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. 572 ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 573 ValueError: If `padding` is less than 0. 574 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 575 ValueError: If `padding` is a tuple whose length is not equal to 6. 576 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). 577 ValueError: If `data_format` is not 'NCDHW'. 578 579 Supported Platforms: 580 ``Ascend`` ``GPU`` 581 582 Examples: 583 >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float32) 584 >>> conv3d = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(4, 3, 3)) 585 >>> output = conv3d(x) 586 >>> print(output.shape) 587 (16, 32, 10, 32, 32) 588 """ 589 590 @cell_attr_register 591 def __init__(self, 592 in_channels, 593 out_channels, 594 kernel_size, 595 stride=1, 596 pad_mode='same', 597 padding=0, 598 dilation=1, 599 group=1, 600 has_bias=False, 601 weight_init='normal', 602 bias_init='zeros', 603 data_format='NCDHW'): 604 """Initialize Conv3d.""" 605 kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) 606 stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) 607 dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) 608 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 609 if isinstance(padding, tuple): 610 Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) 611 super(Conv3d, self).__init__( 612 in_channels, 613 out_channels, 614 kernel_size, 615 stride, 616 pad_mode, 617 padding, 618 dilation, 619 group, 620 has_bias, 621 weight_init, 622 bias_init, 623 data_format) 624 self.conv3d = P.Conv3D(out_channel=self.out_channels, 625 kernel_size=self.kernel_size, 626 mode=1, 627 pad_mode=self.pad_mode, 628 pad=self.padding, 629 stride=self.stride, 630 dilation=self.dilation, 631 group=self.group, 632 data_format=self.format) 633 self.bias_add = P.BiasAdd(data_format=self.format) 634 self.shape = P.Shape() 635 636 def construct(self, x): 637 x_shape = self.shape(x) 638 _check_input_5dims(x_shape, self.cls_name) 639 output = self.conv3d(x, self.weight) 640 if self.has_bias: 641 output = self.bias_add(output, self.bias) 642 return output 643 644 def extend_repr(self): 645 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 646 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 647 'group={}, has_bias={}, ' \ 648 'weight_init={}, bias_init={}, format={}'.format( 649 self.in_channels, 650 self.out_channels, 651 self.kernel_size, 652 self.stride, 653 self.pad_mode, 654 self.padding, 655 self.dilation, 656 self.group, 657 self.has_bias, 658 self.weight_init, 659 self.bias_init, 660 self.format) 661 return s 662 663 664class Conv3dTranspose(_Conv): 665 r""" 666 Compute a 3D transposed convolution, which is also known as a deconvolution 667 (although it is not an actual deconvolution). 668 The transposed convolution operator multiplies each input value element-wise by a learnable kernel, 669 and sums over the outputs from all input feature planes. 670 This module can be seen as the gradient of Conv3d with respect to its input. 671 672 `x` is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size, :math:`C` is channel number, 673 :math:`D` is the characteristic depth, :math:`H` is the height of the characteristic layer, 674 and :math:`W` is the width of the characteristic layer. 675 The calculation process of transposed convolution is equivalent to the reverse calculation of convolution. 676 677 The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding 678 to both sizes of the input. So that when a Conv3d and a ConvTranspose3d are initialized with same parameters, 679 they are inverses of each other in regard to the input and output shapes. 680 However, when stride > 1, Conv3d maps multiple input shapes to the same output shape. 681 ConvTranspose3d provide padding argument to increase the calculated output shape on one or more side. 682 683 The height and width of output are defined as: 684 685 if the 'pad_mode' is set to be "pad", 686 687 .. math:: 688 D_{out} = (D_{in} - 1) \times \text{stride_d} - 2 \times \text{padding_d} + \text{dilation_d} \times 689 (\text{kernel_size_d} - 1) + \text{output_padding_d} + 1 690 691 H_{out} = (H_{in} - 1) \times \text{stride_h} - 2 \times \text{padding_h} + \text{dilation_h} \times 692 (\text{kernel_size_h} - 1) + \text{output_padding_h} + 1 693 694 W_{out} = (W_{in} - 1) \times \text{stride_w} - 2 \times \text{padding_w} + \text{dilation_w} \times 695 (\text{kernel_size_w} - 1) + \text{output_padding_w} + 1 696 697 if the 'pad_mode' is set to be "same", 698 699 .. math:: 700 701 D_{out} = (D_{in} + \text{stride_d} - 1)/\text{stride_d} \\ 702 H_{out} = (H_{in} + \text{stride_h} - 1)/\text{stride_h} \\ 703 W_{out} = (W_{in} + \text{stride_w} - 1)/\text{stride_w} 704 705 if the 'pad_mode' is set to be "valid", 706 707 .. math:: 708 709 D_{out} = (D_{in} - 1) \times \text{stride_d} + \text{dilation_d} \times 710 (\text{kernel_size_d} - 1) + 1 \\ 711 H_{out} = (H_{in} - 1) \times \text{stride_h} + \text{dilation_h} \times 712 (\text{kernel_size_h} - 1) + 1 \\ 713 W_{out} = (W_{in} - 1) \times \text{stride_w} + \text{dilation_w} \times 714 (\text{kernel_size_w} - 1) + 1 715 716 Args: 717 in_channels (int): The number of input channel :math:`C_{in}`. 718 out_channels (int): The number of output channel :math:`C_{out}`. 719 kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution. 720 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 721 the depth, height and width of movement are both strides, or a tuple of three int numbers that 722 represent depth, height and width of movement respectively. Its value must be equal to or greater than 1. 723 Default: 1. 724 pad_mode (str): Select the mode of the pad. The optional values are 725 "pad", "same", "valid". Default: "same". 726 727 - same: Adopts the way of completion. The depth, height and width of the output will be the same as 728 the input `x`. The total number of padding will be calculated in depth, horizontal and vertical 729 directions and evenly distributed to head and tail, top and bottom, left and right if possible. 730 Otherwise, the last extra padding will be done from the tail, bottom and the right side. 731 If this mode is set, `padding` and `output_padding` must be 0. 732 733 - valid: Adopts the way of discarding. The possible largest depth, height and width of output 734 will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding` 735 and `output_padding` must be 0. 736 737 - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `pad` will 738 be padded to the input Tensor borders. `padding` must be greater than or equal to 0. 739 740 padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer, 741 the paddings of head, tail, top, bottom, left and right are the same, equal to padding. 742 If `padding` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to 743 padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly. 744 dilation (Union(int, tuple[int])): The data type is int or a tuple of 3 integers 745 : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1. 746 Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, 747 there will be :math:`k - 1` pixels skipped for each sampling location. 748 Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1. 749 group (int): Splits filter into groups, `in_ channels` and `out_channels` must be 750 divisible by the number of groups. Default: 1. Only 1 is currently supported. 751 output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: 0. 752 Must be greater than or equal to 0. 753 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 754 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. 755 It can be a Tensor, a string, an Initializer or a number. When a string is specified, 756 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 757 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 758 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 759 Initializer for more details. Default: 'normal'. 760 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 761 Initializer and string are the same as 'weight_init'. Refer to the values of 762 Initializer for more details. Default: 'zeros'. 763 data_format (str): The optional value for data format. Currently only support 'NCDHW'. 764 765 Inputs: 766 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. 767 Currently input data type only support float16 and float32. 768 769 Outputs: 770 Tensor, the shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. 771 772 Supported Platforms: 773 ``Ascend`` ``GPU`` 774 775 Raises: 776 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 777 TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding` 778 is neither an int not a tuple of three. 779 TypeError: If input data type is not float16 or float32. 780 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 781 ValueError: If `padding` is less than 0. 782 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 783 ValueError: If `padding` is a tuple whose length is not equal to 6. 784 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). 785 ValueError: If `data_format` is not 'NCDHW'. 786 787 Examples: 788 >>> x = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float32) 789 >>> conv3d_transpose = nn.Conv3dTranspose(in_channels=16, out_channels=3, kernel_size=(4, 6, 2), 790 ... pad_mode='pad') 791 >>> output = conv3d_transpose(x) 792 >>> print(output.shape) 793 (32, 3, 13, 37, 33) 794 """ 795 796 def __init__(self, 797 in_channels, 798 out_channels, 799 kernel_size, 800 stride=1, 801 pad_mode='same', 802 padding=0, 803 dilation=1, 804 group=1, 805 output_padding=0, 806 has_bias=False, 807 weight_init='normal', 808 bias_init='zeros', 809 data_format='NCDHW'): 810 """Initialize Conv3dTranspose.""" 811 kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) 812 stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) 813 dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) 814 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 815 if isinstance(padding, tuple): 816 Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) 817 output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, greater_zero=False) 818 super(Conv3dTranspose, self).__init__( 819 in_channels, 820 out_channels, 821 kernel_size, 822 stride, 823 pad_mode, 824 padding, 825 dilation, 826 group, 827 has_bias, 828 weight_init, 829 bias_init, 830 data_format, 831 transposed=True) 832 self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels, 833 out_channel=self.out_channels, 834 kernel_size=self.kernel_size, 835 mode=1, 836 pad_mode=self.pad_mode, 837 pad=self.padding, 838 stride=self.stride, 839 dilation=self.dilation, 840 group=self.group, 841 output_padding=output_padding, 842 data_format=self.format) 843 self.bias_add = P.BiasAdd(data_format=self.format) 844 self.shape = P.Shape() 845 846 def construct(self, x): 847 x_shape = self.shape(x) 848 _check_input_5dims(x_shape, self.cls_name) 849 output = self.conv3d_transpose(x, self.weight) 850 if self.has_bias: 851 output = self.bias_add(output, self.bias) 852 return output 853 854 def extend_repr(self): 855 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 856 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 857 'group={}, has_bias={}, ' \ 858 'weight_init={}, bias_init={}'.format(self.in_channels, 859 self.out_channels, 860 self.kernel_size, 861 self.stride, 862 self.pad_mode, 863 self.padding, 864 self.dilation, 865 self.group, 866 self.has_bias, 867 self.weight_init, 868 self.bias_init) 869 return s 870 871 872def _deconv_output_length(is_valid, is_same, is_pad, input_length, filter_size, stride_size, dilation_size, padding): 873 """Calculate the width and height of output.""" 874 length = 0 875 filter_size = filter_size + (filter_size - 1) * (dilation_size - 1) 876 if is_valid: 877 if filter_size - stride_size > 0: 878 length = input_length * stride_size + filter_size - stride_size 879 else: 880 length = input_length * stride_size 881 elif is_same: 882 length = input_length * stride_size 883 elif is_pad: 884 length = input_length * stride_size - padding + filter_size - stride_size 885 886 return length 887 888 889class Conv2dTranspose(_Conv): 890 r""" 891 2D transposed convolution layer. 892 893 Compute a 2D transposed convolution, which is also known as a deconvolution 894 (although it is not an actual deconvolution). 895 This module can be seen as the gradient of Conv2d with respect to its input. 896 897 `x` is typically of shape :math:`(N, C, H, W)`, where :math:`N` is batch size, :math:`C` is channel number, 898 :math:`H` is the height of the characteristic layer and :math:`W` is the width of the characteristic layer. 899 900 The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding 901 to both sizes of the input. So that when a Conv2d and a ConvTranspose2d are initialized with same parameters, 902 they are inverses of each other in regard to the input and output shapes. 903 However, when stride > 1, Conv2d maps multiple input shapes to the same output shape. 904 ConvTranspose2d provide padding argument to increase the calculated output shape on one or more side. 905 906 The height and width of output are defined as: 907 908 if the 'pad_mode' is set to be "pad", 909 910 .. math:: 911 912 H_{out} = (H_{in} - 1) \times \text{stride[0]} - \left (\text{padding[0]} + \text{padding[1]}\right ) + 913 \text{dilation[0]} \times (\text{kernel_size[0]} - 1) + 1 914 915 W_{out} = (W_{in} - 1) \times \text{stride[1]} - \left (\text{padding[2]} + \text{padding[3]}\right ) + 916 \text{dilation[1]} \times (\text{kernel_size[1]} - 1) + 1 917 918 if the 'pad_mode' is set to be "same", 919 920 .. math:: 921 922 H_{out} = (H_{in} + \text{stride[0]} - 1)/\text{stride[0]} \\ 923 W_{out} = (W_{in} + \text{stride[1]} - 1)/\text{stride[1]} 924 925 if the 'pad_mode' is set to be "valid", 926 927 .. math:: 928 929 H_{out} = (H_{in} - 1) \times \text{stride[0]} + \text{dilation[0]} \times 930 (\text{ks_w[0]} - 1) + 1 \\ 931 W_{out} = (W_{in} - 1) \times \text{stride[1]} + \text{dilation[1]} \times 932 (\text{ks_w[1]} - 1) + 1 933 934 where :math:`\text{kernel_size[0]}` is the height of the convolution kernel and :math:`\text{kernel_size[1]}` 935 is the width of the convolution kernel. 936 937 Args: 938 in_channels (int): The number of channels in the input space. 939 out_channels (int): The number of channels in the output space. 940 kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height 941 and width of the 2D convolution window. Single int means the value is for both the height and the width of 942 the kernel. A tuple of 2 ints means the first value is for the height and the other is for the 943 width of the kernel. 944 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 945 the height and width of movement are both strides, or a tuple of two int numbers that 946 represent height and width of movement respectively. Its value must be equal to or greater than 1. 947 Default: 1. 948 pad_mode (str): Select the mode of the pad. The optional values are 949 "pad", "same", "valid". Default: "same". 950 951 - pad: Implicit paddings on both sides of the input `x`. 952 953 - same: Adopted the way of completion. 954 955 - valid: Adopted the way of discarding. 956 padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer, 957 the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple 958 with four integers, the paddings of top, bottom, left and right will be equal to padding[0], 959 padding[1], padding[2], and padding[3] accordingly. Default: 0. 960 dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate 961 to use for dilated convolution. If set to be :math:`k > 1`, there will 962 be :math:`k - 1` pixels skipped for each sampling location. Its value must 963 be greater than or equal to 1 and bounded by the height and width of the 964 input `x`. Default: 1. 965 group (int): Splits filter into groups, `in_channels` and `out_channels` must be 966 divisible by the number of groups. This does not support for Davinci devices when group > 1. Default: 1. 967 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 968 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. 969 It can be a Tensor, a string, an Initializer or a number. When a string is specified, 970 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 971 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 972 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 973 Initializer for more details. Default: 'normal'. 974 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 975 Initializer and string are the same as 'weight_init'. Refer to the values of 976 Initializer for more details. Default: 'zeros'. 977 978 Inputs: 979 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 980 981 Outputs: 982 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. 983 984 Raises: 985 TypeError: If `in_channels`, `out_channels` or `group` is not an int. 986 TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. 987 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 988 ValueError: If `padding` is less than 0. 989 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 990 ValueError: If `padding` is a tuple whose length is not equal to 4. 991 ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). 992 993 Supported Platforms: 994 ``Ascend`` ``GPU`` ``CPU`` 995 996 Examples: 997 >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') 998 >>> x = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32) 999 >>> output = net(x).shape 1000 >>> print(output) 1001 (1, 64, 19, 53) 1002 """ 1003 1004 def __init__(self, 1005 in_channels, 1006 out_channels, 1007 kernel_size, 1008 stride=1, 1009 pad_mode='same', 1010 padding=0, 1011 dilation=1, 1012 group=1, 1013 has_bias=False, 1014 weight_init='normal', 1015 bias_init='zeros'): 1016 """Initialize Conv2dTranspose.""" 1017 kernel_size = twice(kernel_size) 1018 stride = twice(stride) 1019 dilation = twice(dilation) 1020 Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) 1021 if isinstance(padding, tuple): 1022 Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name) 1023 # out_channels and in_channels swap. 1024 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, 1025 # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. 1026 super(Conv2dTranspose, self).__init__( 1027 in_channels, 1028 out_channels, 1029 kernel_size, 1030 stride, 1031 pad_mode, 1032 padding, 1033 dilation, 1034 group, 1035 has_bias, 1036 weight_init, 1037 bias_init, 1038 transposed=True) 1039 1040 self.in_channels = in_channels 1041 self.out_channels = out_channels 1042 self.shape = P.Shape() 1043 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 1044 self.is_valid = self.pad_mode == 'valid' 1045 self.is_same = self.pad_mode == 'same' 1046 self.is_pad = self.pad_mode == 'pad' 1047 if Validator.check_bool(has_bias, "has_bias", self.cls_name): 1048 self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') 1049 1050 # cause Conv2DTranspose's out_channel refers to Conv2D's out_channel. 1051 self.conv2d_transpose = P.Conv2DTranspose(out_channel=in_channels, 1052 kernel_size=kernel_size, 1053 mode=1, 1054 pad_mode=pad_mode, 1055 pad=padding, 1056 stride=stride, 1057 dilation=dilation, 1058 group=group) 1059 self.bias_add = P.BiasAdd() 1060 if isinstance(self.padding, int): 1061 self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4 1062 else: 1063 self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding 1064 1065 def shard(self, strategy): 1066 self.conv2d_transpose.shard(strategy) 1067 return self 1068 1069 def construct(self, x): 1070 n, _, h, w = self.shape(x) 1071 h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0], 1072 self.stride[0], self.dilation[0], self.padding_top + self.padding_bottom) 1073 w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1], 1074 self.stride[1], self.dilation[1], self.padding_left + self.padding_right) 1075 if self.has_bias: 1076 return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)), 1077 self.bias) 1078 return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) 1079 1080 def extend_repr(self): 1081 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 1082 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 1083 'group={}, has_bias={}, ' \ 1084 'weight_init={}, bias_init={}'.format(self.in_channels, 1085 self.out_channels, 1086 self.kernel_size, 1087 self.stride, 1088 self.pad_mode, 1089 self.padding, 1090 self.dilation, 1091 self.group, 1092 self.has_bias, 1093 self.weight_init, 1094 self.bias_init) 1095 return s 1096 1097 1098class Conv1dTranspose(_Conv): 1099 r""" 1100 1D transposed convolution layer. 1101 1102 Compute a 1D transposed convolution, which is also known as a deconvolution 1103 (although it is not an actual deconvolution). 1104 This module can be seen as the gradient of Conv1d with respect to its input. 1105 1106 `x` is typically of shape :math:`(N, C, W)`, where :math:`N` is batch size, :math:`C` is channel number and 1107 :math:`W` is the characteristic length. 1108 1109 The padding argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding to 1110 both sizes of the input. So that when a Conv1d and a ConvTranspose1d are initialized with same parameters, 1111 they are inverses of each other in regard to the input and output shapes. However, when stride > 1, 1112 Conv1d maps multiple input shapes to the same output shape. 1113 1114 The width of output is defined as: 1115 1116 .. math:: 1117 1118 W_{out} = \begin{cases} 1119 (W_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times 1120 (\text{ks_w} - 1) + 1, & \text{if pad_mode='pad'}\\ 1121 (W_{in} + \text{stride} - 1)/\text{stride}, & \text{if pad_mode='same'}\\ 1122 (W_{in} - 1) \times \text{stride} + \text{dilation} \times 1123 (\text{ks_w} - 1) + 1, & \text{if pad_mode='valid'} 1124 \end{cases} 1125 1126 where :math:`\text{ks_w}` is the width of the convolution kernel. 1127 1128 Args: 1129 in_channels (int): The number of channels in the input space. 1130 out_channels (int): The number of channels in the output space. 1131 kernel_size (int): int, which specifies the width of the 1D convolution window. 1132 stride (int): The distance of kernel moving, an int number that represents 1133 the width of movement. Default: 1. 1134 pad_mode (str): Select the mode of the pad. The optional values are 1135 "pad", "same", "valid". Default: "same". 1136 1137 - pad: Implicit paddings on both sides of the input `x`. 1138 1139 - same: Adopted the way of completion. 1140 1141 - valid: Adopted the way of discarding. 1142 padding (int): Implicit paddings on both sides of the input `x`. Default: 0. 1143 dilation (int): The data type is int. Specifies the dilation rate 1144 to use for dilated convolution. If set to be :math:`k > 1`, there will 1145 be :math:`k - 1` pixels skipped for each sampling location. Its value must 1146 be greater or equal to 1 and bounded by the width of the 1147 input `x`. Default: 1. 1148 group (int): Splits filter into groups, `in_channels` and `out_channels` must be 1149 divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1. 1150 has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. 1151 weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. 1152 It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, 1153 values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well 1154 as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' 1155 and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of 1156 Initializer for more details. Default: 'normal'. 1157 bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible 1158 Initializer and string are the same as 'weight_init'. Refer to the values of 1159 Initializer for more details. Default: 'zeros'. 1160 1161 Inputs: 1162 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`. 1163 1164 Outputs: 1165 Tensor of shape :math:`(N, C_{out}, W_{out})`. 1166 1167 Raises: 1168 TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. 1169 ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. 1170 ValueError: If `padding` is less than 0. 1171 ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. 1172 1173 Supported Platforms: 1174 ``Ascend`` ``GPU`` ``CPU`` 1175 1176 Examples: 1177 >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') 1178 >>> x = Tensor(np.ones([1, 3, 50]), mindspore.float32) 1179 >>> output = net(x).shape 1180 >>> print(output) 1181 (1, 64, 53) 1182 """ 1183 1184 def __init__(self, 1185 in_channels, 1186 out_channels, 1187 kernel_size, 1188 stride=1, 1189 pad_mode='same', 1190 padding=0, 1191 dilation=1, 1192 group=1, 1193 has_bias=False, 1194 weight_init='normal', 1195 bias_init='zeros'): 1196 """Initialize Conv1dTranspose.""" 1197 Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) 1198 Validator.check_value_type("stride", stride, [int], self.cls_name) 1199 Validator.check_value_type("padding", padding, [int], self.cls_name) 1200 Validator.check_value_type("dilation", dilation, [int], self.cls_name) 1201 Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) 1202 Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) 1203 Validator.check_non_negative_int(padding, 'padding', self.cls_name) 1204 Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) 1205 kernel_size = (1, kernel_size) 1206 stride = (1, stride) 1207 dilation = (1, dilation) 1208 get_shape = P.Shape() 1209 get_dtype = P.DType() 1210 if isinstance(weight_init, Tensor): 1211 weight_init_shape = get_shape(weight_init) 1212 Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) 1213 weight_init_dtype = get_dtype(weight_init) 1214 weight_init_value = weight_init.asnumpy() 1215 weight_init_value = np.expand_dims(weight_init_value, 2) 1216 weight_init = Tensor(weight_init_value, weight_init_dtype) 1217 # out_channels and in_channels swap. 1218 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, 1219 # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. 1220 super(Conv1dTranspose, self).__init__( 1221 in_channels, 1222 out_channels, 1223 kernel_size, 1224 stride, 1225 pad_mode, 1226 padding, 1227 dilation, 1228 group, 1229 has_bias, 1230 weight_init, 1231 bias_init, 1232 transposed=True) 1233 self.padding = (0, 0, padding, padding) 1234 self.in_channels = in_channels 1235 self.out_channels = out_channels 1236 self.shape = P.Shape() 1237 Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) 1238 self.is_valid = self.pad_mode == 'valid' 1239 self.is_same = self.pad_mode == 'same' 1240 self.is_pad = self.pad_mode == 'pad' 1241 if Validator.check_bool(has_bias, "has_bias", self.cls_name): 1242 self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') 1243 1244 # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. 1245 self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, 1246 kernel_size=kernel_size, 1247 mode=1, 1248 pad_mode=pad_mode, 1249 pad=self.padding, 1250 stride=stride, 1251 dilation=dilation, 1252 group=group) 1253 self.bias_add = P.BiasAdd() 1254 self.expand_dims = P.ExpandDims() 1255 self.squeeze = P.Squeeze(2) 1256 1257 def shard(self, strategy): 1258 self.conv2d_transpose.shard(strategy) 1259 return self 1260 1261 def construct(self, x): 1262 x_shape = self.shape(x) 1263 _check_input_3d(x_shape, self.cls_name) 1264 x = self.expand_dims(x, 2) 1265 1266 n, _, h, w = self.shape(x) 1267 1268 h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0], 1269 self.stride[0], self.dilation[0], self.padding[0] + self.padding[1]) 1270 w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1], 1271 self.stride[1], self.dilation[1], self.padding[2] + self.padding[3]) 1272 output = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) 1273 if self.has_bias: 1274 output = self.bias_add(output, self.bias) 1275 1276 output = self.squeeze(output) 1277 return output 1278 1279 def extend_repr(self): 1280 s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ 1281 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 1282 'group={}, has_bias={}, ' \ 1283 'weight_init={}, bias_init={}'.format(self.in_channels, 1284 self.out_channels, 1285 self.kernel_size, 1286 self.stride, 1287 self.pad_mode, 1288 self.padding, 1289 self.dilation, 1290 self.group, 1291 self.has_bias, 1292 self.weight_init, 1293 self.bias_init) 1294 return s 1295