• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""pooling"""
16from mindspore.ops import operations as P
17from mindspore.ops import functional as F
18from mindspore._checkparam import Rel, Validator as validator
19from mindspore.ops.primitive import constexpr
20import mindspore.context as context
21from ..cell import Cell
22
23__all__ = ['AvgPool2d', 'MaxPool2d', 'AvgPool1d', 'MaxPool1d']
24
25
26class _PoolNd(Cell):
27    """N-D  AvgPool"""
28
29    def __init__(self, kernel_size, stride, pad_mode, data_format="NCHW"):
30        """Initialize _PoolNd."""
31        super(_PoolNd, self).__init__()
32        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
33        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
34        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
35            raise ValueError(f"For '{self.cls_name}, the 'NHWC' format only support in GPU target, but got device "
36                             f"target {context.get_context('device_target')}.")
37
38        def _check_int_or_tuple(arg_name, arg_value):
39            validator.check_value_type(arg_name, arg_value, [int, tuple], self.cls_name)
40            error_msg = f"For '{self.cls_name}', the '{arg_name}' should be an positive int number or " \
41                        f"a tuple of two positive int numbers, but got {arg_value}"
42            if isinstance(arg_value, int):
43                if arg_value <= 0:
44                    raise ValueError(error_msg)
45            elif len(arg_value) == 2:
46                for item in arg_value:
47                    if isinstance(item, int) and item > 0:
48                        continue
49                    raise ValueError(error_msg)
50            else:
51                raise ValueError(error_msg)
52            return arg_value
53
54        self.kernel_size = _check_int_or_tuple('kernel_size', kernel_size)
55        self.stride = _check_int_or_tuple('stride', stride)
56
57    def construct(self, *inputs):
58        pass
59
60    def extend_repr(self):
61        return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__)
62
63
64@constexpr
65def _shape_check(in_shape, prim_name=None):
66    msg_prefix = f"For '{prim_name}', the" if prim_name else "The"
67    if len(in_shape) != 3:
68        raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}")
69
70
71class MaxPool2d(_PoolNd):
72    r"""
73    2D max pooling operation for temporal data.
74
75    Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
76
77    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool2d outputs
78    regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
79    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
80
81    .. math::
82        \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
83        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
84
85    Note:
86        pad_mode for training only supports "same" and "valid".
87
88    Args:
89        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value,
90            is an int number that represents height and width are both kernel_size,
91            or a tuple of two int numbers that represent height and width respectively.
92            Default: 1.
93        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
94            the height and width of movement are both strides, or a tuple of two int numbers that
95            represent height and width of movement respectively. Default: 1.
96        pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
97            Default: "valid".
98
99            - same: Adopts the way of completion. The height and width of the output will be the same as
100              the input. The total number of padding will be calculated in horizontal and vertical
101              directions and evenly distributed to top and bottom, left and right if possible.
102              Otherwise, the last extra padding will be done from the bottom and the right side.
103
104            - valid: Adopts the way of discarding. The possible largest height and width of output
105              will be returned without padding. Extra pixels will be discarded.
106        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
107            Default: 'NCHW'.
108
109    Inputs:
110        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
111
112    Outputs:
113        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
114
115    Raises:
116        TypeError: If `kernel_size` or `strides` is neither int nor tuple.
117        ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive.
118        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
119        ValueError: If `kernel_size` or `strides` is less than 1.
120        ValueError: If length of shape of `x` is not equal to 4.
121
122    Supported Platforms:
123        ``Ascend`` ``GPU`` ``CPU``
124
125    Examples:
126        >>> pool = nn.MaxPool2d(kernel_size=3, stride=1)
127        >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32)
128        >>> output = pool(x)
129        >>> print(output.shape)
130        (1, 2, 2, 2)
131    """
132
133    def __init__(self, kernel_size=1, stride=1, pad_mode="valid", data_format="NCHW"):
134        """Initialize MaxPool2d."""
135        super(MaxPool2d, self).__init__(kernel_size, stride, pad_mode, data_format)
136        self.max_pool = P.MaxPool(kernel_size=self.kernel_size,
137                                  strides=self.stride,
138                                  pad_mode=self.pad_mode,
139                                  data_format=self.format)
140
141    def construct(self, x):
142        out = self.max_pool(x)
143        return out
144
145
146class MaxPool1d(_PoolNd):
147    r"""
148    1D max pooling operation for temporal data.
149
150    Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes.
151
152    Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, MaxPool1d outputs
153    regional maximum in the :math:`(L_{in})`-dimension. Given kernel size
154    :math:`ks = (l_{ker})` and stride :math:`s = (s_0)`, the operation is as follows.
155
156    .. math::
157        \text{output}(N_i, C_j, l) = \max_{n=0, \ldots, l_{ker}-1}
158        \text{input}(N_i, C_j, s_0 \times l + n)
159
160    Note:
161        pad_mode for training only supports "same" and "valid".
162
163    Args:
164        kernel_size (int): The size of kernel used to take the max value, Default: 1.
165        stride (int): The distance of kernel moving, an int number that represents
166            the width of movement is stride, Default: 1.
167        pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
168            Default: "valid".
169
170            - same: Adopts the way of completion. The total number of padding will be calculated in horizontal
171              and vertical directions and evenly distributed to top and bottom, left and right if possible.
172              Otherwise, the last extra padding will be done from the bottom and the right side.
173
174            - valid: Adopts the way of discarding. The possible largest height and width of output
175              will be returned without padding. Extra pixels will be discarded.
176
177    Inputs:
178        - **x** (Tensor) - Tensor of shape :math:`(N, C, L_{in})`.
179
180    Outputs:
181        Tensor of shape :math:`(N, C, L_{out}))`.
182
183    Raises:
184        TypeError: If `kernel_size` or `strides` is not an int.
185        ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive.
186        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
187        ValueError: If `kernel_size` or `strides` is less than 1.
188        ValueError: If length of shape of `x` is not equal to 4.
189
190    Supported Platforms:
191        ``Ascend`` ``GPU`` ``CPU``
192
193    Examples:
194        >>> max_pool = nn.MaxPool1d(kernel_size=3, stride=1)
195        >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4]), mindspore.float32)
196        >>> output = max_pool(x)
197        >>> result = output.shape
198        >>> print(result)
199        (1, 2, 2)
200    """
201
202    def __init__(self, kernel_size=1, stride=1, pad_mode="valid"):
203        """Initialize MaxPool1d."""
204        super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode)
205        validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name)
206        validator.check_value_type('stride', stride, [int], self.cls_name)
207        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
208        validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name)
209        validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name)
210        self.kernel_size = (1, kernel_size)
211        self.stride = (1, stride)
212        self.max_pool = P.MaxPool(kernel_size=self.kernel_size,
213                                  strides=self.stride,
214                                  pad_mode=self.pad_mode)
215        self.shape = F.shape
216        self.reduce_mean = P.ReduceMean(keep_dims=True)
217        self.expand = P.ExpandDims()
218        self.squeeze = P.Squeeze(2)
219
220    def construct(self, x):
221        _shape_check(self.shape(x), self.cls_name)
222        x = self.expand(x, 2)
223        output = self.max_pool(x)
224        output = self.squeeze(output)
225        return output
226
227
228class AvgPool2d(_PoolNd):
229    r"""
230    2D average pooling for temporal data.
231
232    Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
233
234    Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPool2d outputs
235    regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
236    :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
237
238    .. math::
239        \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1}
240        \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
241
242    Note:
243        pad_mode for training only supports "same" and "valid".
244
245    Args:
246        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value.
247            The data type of kernel_size must be int and the value represents the height and width,
248            or a tuple of two int numbers that represent height and width respectively.
249            Default: 1.
250        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
251            the height and width of movement are both strides, or a tuple of two int numbers that
252            represent height and width of movement respectively. Default: 1.
253        pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
254            Default: "valid".
255
256            - same: Adopts the way of completion. The height and width of the output will be the same as
257              the input. The total number of padding will be calculated in horizontal and vertical
258              directions and evenly distributed to top and bottom, left and right if possible.
259              Otherwise, the last extra padding will be done from the bottom and the right side.
260
261            - valid: Adopts the way of discarding. The possible largest height and width of output
262              will be returned without padding. Extra pixels will be discarded.
263        data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
264            Default: 'NCHW'.
265
266
267    Inputs:
268        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
269
270    Outputs:
271        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
272
273    Raises:
274        TypeError: If `kernel_size` or `strides` is neither int nor tuple.
275        ValueError: If `pad_mode` is neither 'valid' nor 'same' with not case sensitive.
276        ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
277        ValueError: If `kernel_size` or `strides` is less than 1.
278        ValueError: If length of shape of `x` is not equal to 4.
279
280    Supported Platforms:
281        ``Ascend`` ``GPU`` ``CPU``
282
283    Examples:
284        >>> pool = nn.AvgPool2d(kernel_size=3, stride=1)
285        >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32)
286        >>> output = pool(x)
287        >>> print(output.shape)
288        (1, 2, 2, 2)
289    """
290
291    def __init__(self,
292                 kernel_size=1,
293                 stride=1,
294                 pad_mode="valid",
295                 data_format="NCHW"):
296        """Initialize AvgPool2d."""
297        super(AvgPool2d, self).__init__(kernel_size, stride, pad_mode, data_format)
298        self.avg_pool = P.AvgPool(kernel_size=self.kernel_size,
299                                  strides=self.stride,
300                                  pad_mode=self.pad_mode,
301                                  data_format=self.format)
302
303    def construct(self, x):
304        return self.avg_pool(x)
305
306
307class AvgPool1d(_PoolNd):
308    r"""
309    1D average pooling for temporal data.
310
311    Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes.
312
313    Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, AvgPool1d outputs
314    regional average in the :math:`(L_{in})`-dimension. Given kernel size
315    :math:`ks = l_{ker}` and stride :math:`s = s_0`, the operation is as follows.
316
317    .. math::
318        \text{output}(N_i, C_j, l) = \frac{1}{l_{ker}} \sum_{n=0}^{l_{ker}-1}
319        \text{input}(N_i, C_j, s_0 \times l + n)
320
321    Note:
322        pad_mode for training only supports "same" and "valid".
323
324    Args:
325        kernel_size (int): The size of kernel window used to take the average value, Default: 1.
326        stride (int): The distance of kernel moving, an int number that represents
327            the width of movement is strides, Default: 1.
328        pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
329            Default: "valid".
330
331            - same: Adopts the way of completion. The height and width of the output will be the same as
332              the input. The total number of padding will be calculated in horizontal and vertical
333              directions and evenly distributed to top and bottom, left and right if possible.
334              Otherwise, the last extra padding will be done from the bottom and the right side.
335
336            - valid: Adopts the way of discarding. The possible largest height and width of output
337              will be returned without padding. Extra pixels will be discarded.
338
339
340    Inputs:
341        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})`.
342
343    Outputs:
344        Tensor of shape :math:`(N, C_{out}, L_{out})`.
345
346    Raises:
347        TypeError: If `kernel_size` or `stride` is not an int.
348        ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive.
349        ValueError: If `kernel_size` or `strides` is less than 1.
350        ValueError: If length of shape of `x` is not equal to 3.
351
352    Supported Platforms:
353        ``Ascend`` ``GPU`` ``CPU``
354
355    Examples:
356        >>> pool = nn.AvgPool1d(kernel_size=6, stride=1)
357        >>> x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
358        >>> output = pool(x)
359        >>> result = output.shape
360        >>> print(result)
361        (1, 3, 1)
362    """
363
364    def __init__(self,
365                 kernel_size=1,
366                 stride=1,
367                 pad_mode="valid"):
368        """Initialize AvgPool1d."""
369        validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name)
370        validator.check_value_type('stride', stride, [int], self.cls_name)
371        self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
372        validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name)
373        validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name)
374        super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode)
375        self.kernel_size = (1, kernel_size)
376        self.stride = (1, stride)
377        self.avg_pool = P.AvgPool(kernel_size=self.kernel_size,
378                                  strides=self.stride,
379                                  pad_mode=self.pad_mode)
380        self.shape = F.shape
381        self.reduce_mean = P.ReduceMean(keep_dims=True)
382        self.slice = P.Slice()
383        self.expand = P.ExpandDims()
384        self.squeeze = P.Squeeze(2)
385
386    def construct(self, x):
387        x = F.depend(x, _shape_check(self.shape(x), self.cls_name))
388        batch, channel, width = self.shape(x)
389        if width == self.kernel_size[1]:
390            x = self.reduce_mean(x, 2)
391        elif width - self.kernel_size[1] < self.stride[1]:
392            x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1]))
393            x = self.reduce_mean(x, 2)
394        else:
395            x = self.expand(x, 2)
396            x = self.avg_pool(x)
397            x = self.squeeze(x)
398        return x
399