1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""pooling""" 16from mindspore.ops import operations as P 17from mindspore.ops import functional as F 18from mindspore._checkparam import Rel, Validator as validator 19from mindspore.ops.primitive import constexpr 20import mindspore.context as context 21from ..cell import Cell 22 23__all__ = ['AvgPool2d', 'MaxPool2d', 'AvgPool1d', 'MaxPool1d'] 24 25 26class _PoolNd(Cell): 27 """N-D AvgPool""" 28 29 def __init__(self, kernel_size, stride, pad_mode, data_format="NCHW"): 30 """Initialize _PoolNd.""" 31 super(_PoolNd, self).__init__() 32 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name) 33 self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) 34 if context.get_context("device_target") != "GPU" and self.format == "NHWC": 35 raise ValueError(f"For '{self.cls_name}, the 'NHWC' format only support in GPU target, but got device " 36 f"target {context.get_context('device_target')}.") 37 38 def _check_int_or_tuple(arg_name, arg_value): 39 validator.check_value_type(arg_name, arg_value, [int, tuple], self.cls_name) 40 error_msg = f"For '{self.cls_name}', the '{arg_name}' should be an positive int number or " \ 41 f"a tuple of two positive int numbers, but got {arg_value}" 42 if isinstance(arg_value, int): 43 if arg_value <= 0: 44 raise ValueError(error_msg) 45 elif len(arg_value) == 2: 46 for item in arg_value: 47 if isinstance(item, int) and item > 0: 48 continue 49 raise ValueError(error_msg) 50 else: 51 raise ValueError(error_msg) 52 return arg_value 53 54 self.kernel_size = _check_int_or_tuple('kernel_size', kernel_size) 55 self.stride = _check_int_or_tuple('stride', stride) 56 57 def construct(self, *inputs): 58 pass 59 60 def extend_repr(self): 61 return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__) 62 63 64@constexpr 65def _shape_check(in_shape, prim_name=None): 66 msg_prefix = f"For '{prim_name}', the" if prim_name else "The" 67 if len(in_shape) != 3: 68 raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}") 69 70 71class MaxPool2d(_PoolNd): 72 r""" 73 2D max pooling operation for temporal data. 74 75 Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes. 76 77 Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool2d outputs 78 regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size 79 :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows. 80 81 .. math:: 82 \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} 83 \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) 84 85 Note: 86 pad_mode for training only supports "same" and "valid". 87 88 Args: 89 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value, 90 is an int number that represents height and width are both kernel_size, 91 or a tuple of two int numbers that represent height and width respectively. 92 Default: 1. 93 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 94 the height and width of movement are both strides, or a tuple of two int numbers that 95 represent height and width of movement respectively. Default: 1. 96 pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. 97 Default: "valid". 98 99 - same: Adopts the way of completion. The height and width of the output will be the same as 100 the input. The total number of padding will be calculated in horizontal and vertical 101 directions and evenly distributed to top and bottom, left and right if possible. 102 Otherwise, the last extra padding will be done from the bottom and the right side. 103 104 - valid: Adopts the way of discarding. The possible largest height and width of output 105 will be returned without padding. Extra pixels will be discarded. 106 data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. 107 Default: 'NCHW'. 108 109 Inputs: 110 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 111 112 Outputs: 113 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. 114 115 Raises: 116 TypeError: If `kernel_size` or `strides` is neither int nor tuple. 117 ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive. 118 ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'. 119 ValueError: If `kernel_size` or `strides` is less than 1. 120 ValueError: If length of shape of `x` is not equal to 4. 121 122 Supported Platforms: 123 ``Ascend`` ``GPU`` ``CPU`` 124 125 Examples: 126 >>> pool = nn.MaxPool2d(kernel_size=3, stride=1) 127 >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32) 128 >>> output = pool(x) 129 >>> print(output.shape) 130 (1, 2, 2, 2) 131 """ 132 133 def __init__(self, kernel_size=1, stride=1, pad_mode="valid", data_format="NCHW"): 134 """Initialize MaxPool2d.""" 135 super(MaxPool2d, self).__init__(kernel_size, stride, pad_mode, data_format) 136 self.max_pool = P.MaxPool(kernel_size=self.kernel_size, 137 strides=self.stride, 138 pad_mode=self.pad_mode, 139 data_format=self.format) 140 141 def construct(self, x): 142 out = self.max_pool(x) 143 return out 144 145 146class MaxPool1d(_PoolNd): 147 r""" 148 1D max pooling operation for temporal data. 149 150 Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes. 151 152 Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, MaxPool1d outputs 153 regional maximum in the :math:`(L_{in})`-dimension. Given kernel size 154 :math:`ks = (l_{ker})` and stride :math:`s = (s_0)`, the operation is as follows. 155 156 .. math:: 157 \text{output}(N_i, C_j, l) = \max_{n=0, \ldots, l_{ker}-1} 158 \text{input}(N_i, C_j, s_0 \times l + n) 159 160 Note: 161 pad_mode for training only supports "same" and "valid". 162 163 Args: 164 kernel_size (int): The size of kernel used to take the max value, Default: 1. 165 stride (int): The distance of kernel moving, an int number that represents 166 the width of movement is stride, Default: 1. 167 pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. 168 Default: "valid". 169 170 - same: Adopts the way of completion. The total number of padding will be calculated in horizontal 171 and vertical directions and evenly distributed to top and bottom, left and right if possible. 172 Otherwise, the last extra padding will be done from the bottom and the right side. 173 174 - valid: Adopts the way of discarding. The possible largest height and width of output 175 will be returned without padding. Extra pixels will be discarded. 176 177 Inputs: 178 - **x** (Tensor) - Tensor of shape :math:`(N, C, L_{in})`. 179 180 Outputs: 181 Tensor of shape :math:`(N, C, L_{out}))`. 182 183 Raises: 184 TypeError: If `kernel_size` or `strides` is not an int. 185 ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive. 186 ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'. 187 ValueError: If `kernel_size` or `strides` is less than 1. 188 ValueError: If length of shape of `x` is not equal to 4. 189 190 Supported Platforms: 191 ``Ascend`` ``GPU`` ``CPU`` 192 193 Examples: 194 >>> max_pool = nn.MaxPool1d(kernel_size=3, stride=1) 195 >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4]), mindspore.float32) 196 >>> output = max_pool(x) 197 >>> result = output.shape 198 >>> print(result) 199 (1, 2, 2) 200 """ 201 202 def __init__(self, kernel_size=1, stride=1, pad_mode="valid"): 203 """Initialize MaxPool1d.""" 204 super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode) 205 validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) 206 validator.check_value_type('stride', stride, [int], self.cls_name) 207 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name) 208 validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) 209 validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) 210 self.kernel_size = (1, kernel_size) 211 self.stride = (1, stride) 212 self.max_pool = P.MaxPool(kernel_size=self.kernel_size, 213 strides=self.stride, 214 pad_mode=self.pad_mode) 215 self.shape = F.shape 216 self.reduce_mean = P.ReduceMean(keep_dims=True) 217 self.expand = P.ExpandDims() 218 self.squeeze = P.Squeeze(2) 219 220 def construct(self, x): 221 _shape_check(self.shape(x), self.cls_name) 222 x = self.expand(x, 2) 223 output = self.max_pool(x) 224 output = self.squeeze(output) 225 return output 226 227 228class AvgPool2d(_PoolNd): 229 r""" 230 2D average pooling for temporal data. 231 232 Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes. 233 234 Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPool2d outputs 235 regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size 236 :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows. 237 238 .. math:: 239 \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1} 240 \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) 241 242 Note: 243 pad_mode for training only supports "same" and "valid". 244 245 Args: 246 kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. 247 The data type of kernel_size must be int and the value represents the height and width, 248 or a tuple of two int numbers that represent height and width respectively. 249 Default: 1. 250 stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents 251 the height and width of movement are both strides, or a tuple of two int numbers that 252 represent height and width of movement respectively. Default: 1. 253 pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. 254 Default: "valid". 255 256 - same: Adopts the way of completion. The height and width of the output will be the same as 257 the input. The total number of padding will be calculated in horizontal and vertical 258 directions and evenly distributed to top and bottom, left and right if possible. 259 Otherwise, the last extra padding will be done from the bottom and the right side. 260 261 - valid: Adopts the way of discarding. The possible largest height and width of output 262 will be returned without padding. Extra pixels will be discarded. 263 data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. 264 Default: 'NCHW'. 265 266 267 Inputs: 268 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. 269 270 Outputs: 271 Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. 272 273 Raises: 274 TypeError: If `kernel_size` or `strides` is neither int nor tuple. 275 ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive. 276 ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'. 277 ValueError: If `kernel_size` or `strides` is less than 1. 278 ValueError: If length of shape of `x` is not equal to 4. 279 280 Supported Platforms: 281 ``Ascend`` ``GPU`` ``CPU`` 282 283 Examples: 284 >>> pool = nn.AvgPool2d(kernel_size=3, stride=1) 285 >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32) 286 >>> output = pool(x) 287 >>> print(output.shape) 288 (1, 2, 2, 2) 289 """ 290 291 def __init__(self, 292 kernel_size=1, 293 stride=1, 294 pad_mode="valid", 295 data_format="NCHW"): 296 """Initialize AvgPool2d.""" 297 super(AvgPool2d, self).__init__(kernel_size, stride, pad_mode, data_format) 298 self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, 299 strides=self.stride, 300 pad_mode=self.pad_mode, 301 data_format=self.format) 302 303 def construct(self, x): 304 return self.avg_pool(x) 305 306 307class AvgPool1d(_PoolNd): 308 r""" 309 1D average pooling for temporal data. 310 311 Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes. 312 313 Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, AvgPool1d outputs 314 regional average in the :math:`(L_{in})`-dimension. Given kernel size 315 :math:`ks = l_{ker}` and stride :math:`s = s_0`, the operation is as follows. 316 317 .. math:: 318 \text{output}(N_i, C_j, l) = \frac{1}{l_{ker}} \sum_{n=0}^{l_{ker}-1} 319 \text{input}(N_i, C_j, s_0 \times l + n) 320 321 Note: 322 pad_mode for training only supports "same" and "valid". 323 324 Args: 325 kernel_size (int): The size of kernel window used to take the average value, Default: 1. 326 stride (int): The distance of kernel moving, an int number that represents 327 the width of movement is strides, Default: 1. 328 pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. 329 Default: "valid". 330 331 - same: Adopts the way of completion. The height and width of the output will be the same as 332 the input. The total number of padding will be calculated in horizontal and vertical 333 directions and evenly distributed to top and bottom, left and right if possible. 334 Otherwise, the last extra padding will be done from the bottom and the right side. 335 336 - valid: Adopts the way of discarding. The possible largest height and width of output 337 will be returned without padding. Extra pixels will be discarded. 338 339 340 Inputs: 341 - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})`. 342 343 Outputs: 344 Tensor of shape :math:`(N, C_{out}, L_{out})`. 345 346 Raises: 347 TypeError: If `kernel_size` or `stride` is not an int. 348 ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive. 349 ValueError: If `kernel_size` or `strides` is less than 1. 350 ValueError: If length of shape of `x` is not equal to 3. 351 352 Supported Platforms: 353 ``Ascend`` ``GPU`` ``CPU`` 354 355 Examples: 356 >>> pool = nn.AvgPool1d(kernel_size=6, stride=1) 357 >>> x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32) 358 >>> output = pool(x) 359 >>> result = output.shape 360 >>> print(result) 361 (1, 3, 1) 362 """ 363 364 def __init__(self, 365 kernel_size=1, 366 stride=1, 367 pad_mode="valid"): 368 """Initialize AvgPool1d.""" 369 validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) 370 validator.check_value_type('stride', stride, [int], self.cls_name) 371 self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name) 372 validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) 373 validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) 374 super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) 375 self.kernel_size = (1, kernel_size) 376 self.stride = (1, stride) 377 self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, 378 strides=self.stride, 379 pad_mode=self.pad_mode) 380 self.shape = F.shape 381 self.reduce_mean = P.ReduceMean(keep_dims=True) 382 self.slice = P.Slice() 383 self.expand = P.ExpandDims() 384 self.squeeze = P.Squeeze(2) 385 386 def construct(self, x): 387 x = F.depend(x, _shape_check(self.shape(x), self.cls_name)) 388 batch, channel, width = self.shape(x) 389 if width == self.kernel_size[1]: 390 x = self.reduce_mean(x, 2) 391 elif width - self.kernel_size[1] < self.stride[1]: 392 x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1])) 393 x = self.reduce_mean(x, 2) 394 else: 395 x = self.expand(x, 2) 396 x = self.avg_pool(x) 397 x = self.squeeze(x) 398 return x 399