• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 #include "include/api/status.h"
26 #include "include/dataset/constants.h"
27 #include "include/dataset/transforms.h"
28 
29 namespace mindspore {
30 namespace dataset {
31 
32 // Transform operations for performing computer vision.
33 namespace vision {
34 
35 // Forward Declarations
36 class RotateOperation;
37 
38 /// \brief Apply affine transform on the input image.
39 class Affine final : public TensorTransform {
40  public:
41   /// \brief Constructor.
42   /// \param[in] degrees The degrees to rotate the image.
43   /// \param[in] translation The values representing vertical and horizontal translation (default = {0.0, 0.0}).
44   ///     The first value represents the x axis translation while the second represents the y axis translation.
45   /// \param[in] scale The scaling factor for the image (default = 0.0).
46   /// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}).
47   /// \param[in] interpolation An enum for the mode of interpolation.
48   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite).
49   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
50   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
51   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
52   /// \param[in] fill_value A vector representing the value to fill the area outside the transformation
53   ///    in the output image. If 1 value is provided, it is used for all RGB channels.
54   ///    If 3 values are provided, it is used to fill R, G, B channels respectively.
55   explicit Affine(float_t degrees, const std::vector<float> &translation = {0.0, 0.0}, float scale = 0.0,
56                   const std::vector<float> &shear = {0.0, 0.0},
57                   InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
58                   const std::vector<uint8_t> &fill_value = {0, 0, 0});
59 
60   /// \brief Destructor.
61   ~Affine() = default;
62 
63   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
64   /// \return Shared pointer to TensorOperation object.
65   std::shared_ptr<TensorOperation> Parse() override;
66 
67  private:
68   struct Data;
69   std::shared_ptr<Data> data_;
70 };
71 
72 /// \brief Crop the input image at the center to the given size.
73 class CenterCrop final : public TensorTransform {
74  public:
75   /// \brief Constructor.
76   /// \param[in] size A vector representing the output size of the cropped image.
77   ///     If the size is a single value, a squared crop of size (size, size) is returned.
78   ///     If the size has 2 values, it should be (height, width).
79   explicit CenterCrop(std::vector<int32_t> size);
80 
81   /// \brief Destructor.
82   ~CenterCrop() = default;
83 
84  protected:
85   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
86   /// \return Shared pointer to TensorOperation object.
87   std::shared_ptr<TensorOperation> Parse() override;
88 
89   std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
90 
91  private:
92   struct Data;
93   std::shared_ptr<Data> data_;
94 };
95 
96 /// \brief Crop an image based on location and crop size.
97 class Crop final : public TensorTransform {
98  public:
99   /// \brief Constructor.
100   /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}.
101   /// \param[in] size Size of the cropped area.
102   ///     If the size is a single value, a squared crop of size (size, size) is returned.
103   ///     If the size has 2 values, it should be (height, width).
104   Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size);
105 
106   /// \brief Destructor.
107   ~Crop() = default;
108 
109  protected:
110   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
111   /// \return Shared pointer to TensorOperation object.
112   std::shared_ptr<TensorOperation> Parse() override;
113 
114  private:
115   struct Data;
116   std::shared_ptr<Data> data_;
117 };
118 
119 /// \brief Decode the input image in RGB mode.
120 class Decode final : public TensorTransform {
121  public:
122   /// \brief Constructor.
123   /// \param[in] rgb A boolean indicating whether to decode the image in RGB mode or not.
124   explicit Decode(bool rgb = true);
125 
126   /// \brief Destructor.
127   ~Decode() = default;
128 
129  protected:
130   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
131   /// \return Shared pointer to TensorOperation object.
132   std::shared_ptr<TensorOperation> Parse() override;
133 
134   std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
135 
136  private:
137   struct Data;
138   std::shared_ptr<Data> data_;
139 };
140 
141 /// \brief Blur the input image with the specified Gaussian kernel.
142 class GaussianBlur final : public TensorTransform {
143  public:
144   /// \brief Constructor.
145   /// \param[in] kernel_size A vector of Gaussian kernel size for width and height. The value must be positive and odd.
146   /// \param[in] sigma A vector of Gaussian kernel standard deviation sigma for width and height. The values must be
147   ///     positive. Using default value 0 means to calculate the sigma according to the kernel size.
148   GaussianBlur(const std::vector<int32_t> &kernel_size, const std::vector<float> &sigma = {0., 0.});
149 
150   /// \brief Destructor.
151   ~GaussianBlur() = default;
152 
153  protected:
154   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
155   /// \return Shared pointer to TensorOperation object.
156   std::shared_ptr<TensorOperation> Parse() override;
157 
158  private:
159   struct Data;
160   std::shared_ptr<Data> data_;
161 };
162 
163 /// \brief Normalize the input image with respect to mean and standard deviation.
164 class Normalize final : public TensorTransform {
165  public:
166   /// \brief Constructor.
167   /// \param[in] mean A vector of mean values for each channel, with respect to channel order.
168   ///     The mean values must be in range [0.0, 255.0].
169   /// \param[in] std A vector of standard deviations for each channel, with respect to channel order.
170   ///     The standard deviation values must be in range (0.0, 255.0].
171   Normalize(std::vector<float> mean, std::vector<float> std);
172 
173   /// \brief Destructor.
174   ~Normalize() = default;
175 
176  protected:
177   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
178   /// \return Shared pointer to TensorOperation object.
179   std::shared_ptr<TensorOperation> Parse() override;
180 
181   std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
182 
183  private:
184   struct Data;
185   std::shared_ptr<Data> data_;
186 };
187 
188 /// \brief Apply a Random Affine transformation on the input image in RGB or Greyscale mode.
189 class RandomAffine final : public TensorTransform {
190  public:
191   /// \brief Constructor.
192   /// \param[in] degrees A float vector of size 2, representing the starting and ending degree.
193   /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes.
194   ///    If the size is 2, (min_dx, max_dx, 0, 0).
195   ///    If the size is 4, (min_dx, max_dx, min_dy, max_dy),
196   ///    all values are in range [-1, 1].
197   /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range.
198   /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees
199   ///    vertically and horizontally.
200   ///    If the size is 2, (min_shear_x, max_shear_x, 0, 0),
201   ///    if the size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y).
202   /// \param[in] interpolation An enum for the mode of interpolation.
203   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite).
204   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
205   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
206   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
207   /// \param[in] fill_value A vector representing the value to fill the area outside the transform
208   ///    in the output image. If 1 value is provided, it is used for all RGB channels.
209   ///    If 3 values are provided, it is used to fill R, G and B channels respectively.
210   explicit RandomAffine(const std::vector<float_t> &degrees,
211                         const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0},
212                         const std::vector<float_t> &scale_range = {1.0, 1.0},
213                         const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
214                         InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
215                         const std::vector<uint8_t> &fill_value = {0, 0, 0});
216 
217   /// \brief Destructor.
218   ~RandomAffine() = default;
219 
220   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
221   /// \return Shared pointer to TensorOperation object.
222   std::shared_ptr<TensorOperation> Parse() override;
223 
224  private:
225   struct Data;
226   std::shared_ptr<Data> data_;
227 };
228 
229 /// \brief Resize the input image to the given size.
230 class Resize final : public TensorTransform {
231  public:
232   /// \brief Constructor.
233   /// \param[in] size A vector representing the output size of the resized image.
234   ///     If the size is a single value, the image will be resized to this value with
235   ///     the same image aspect ratio. If the size has 2 values, it should be (height, width).
236   /// \param[in] interpolation An enum for the mode of interpolation.
237   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite).
238   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
239   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
240   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
241   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
242   explicit Resize(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
243 
244   /// \brief Destructor.
245   ~Resize() = default;
246 
247  protected:
248   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
249   /// \return Shared pointer to TensorOperation object.
250   std::shared_ptr<TensorOperation> Parse() override;
251 
252   std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
253 
254  private:
255   struct Data;
256   std::shared_ptr<Data> data_;
257 };
258 
259 /// \brief Keep the original picture ratio and fills the rest.
260 class ResizePreserveAR final : public TensorTransform {
261  public:
262   /// \brief Constructor.
263   /// \param[in] height The height of image output value after resizing.
264   /// \param[in] width The width of image output value after resizing.
265   /// \param[in] img_orientation optional rotation angle.
266   ///     - img_orientation = 1, Rotate 0 degree.
267   ///     - img_orientation = 2, Rotate 0 degree and apply horizontal flip.
268   ///     - img_orientation = 3, Rotate 180 degree.
269   ///     - img_orientation = 4, Rotate 180 degree and apply horizontal flip.
270   ///     - img_orientation = 5, Rotate 90 degree and apply horizontal flip.
271   ///     - img_orientation = 6, Rotate 90 degree.
272   ///     - img_orientation = 7, Rotate 270 degree and apply horizontal flip.
273   ///     - img_orientation = 8, Rotate 270 degree.
274   ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0);
275 
276   /// \brief Destructor.
277   ~ResizePreserveAR() = default;
278 
279  protected:
280   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
281   /// \return Shared pointer to TensorOperation object.
282   std::shared_ptr<TensorOperation> Parse() override;
283 
284  private:
285   struct Data;
286   std::shared_ptr<Data> data_;
287 };
288 
289 /// \brief RGB2BGR TensorTransform.
290 /// \notes Convert the format of input image from RGB to BGR.
291 class RGB2BGR final : public TensorTransform {
292  public:
293   /// \brief Constructor.
294   RGB2BGR() = default;
295 
296   /// \brief Destructor.
297   ~RGB2BGR() = default;
298 
299  protected:
300   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
301   /// \return Shared pointer to TensorOperation object.
302   std::shared_ptr<TensorOperation> Parse() override;
303 };
304 
305 /// \brief RGB2GRAY TensorTransform.
306 /// \note Convert RGB image or color image to grayscale image.
307 /// \brief Convert a RGB image or color image to a grayscale one.
308 class RGB2GRAY final : public TensorTransform {
309  public:
310   /// \brief Constructor.
311   RGB2GRAY() = default;
312 
313   /// \brief Destructor.
314   ~RGB2GRAY() = default;
315 
316  protected:
317   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
318   /// \return Shared pointer to TensorOperation object.
319   std::shared_ptr<TensorOperation> Parse() override;
320 };
321 
322 /// \brief Rotate the input image according to parameters.
323 class Rotate final : public TensorTransform {
324  public:
325   /// \brief Constructor.
326   /// \note This api is only used in Lite, the interpolation mode is bilinear.
327   /// \param[in] angle_id The fix rotation angle.
328   ///     - FixRotationAngle::k0Degree = 1, Rotate 0 degree.
329   ///     - FixRotationAngle::k0DegreeAndMirror = 2, Rotate 0 degree and apply horizontal flip.
330   ///     - FixRotationAngle::k180Degree = 3, Rotate 180 degree.
331   ///     - FixRotationAngle::k180DegreeAndMirror = 4, Rotate 180 degree and apply horizontal flip.
332   ///     - FixRotationAngle::k90DegreeAndMirror = 5, Rotate 90 degree and apply horizontal flip.
333   ///     - FixRotationAngle::k90Degree = 6, Rotate 90 degree.
334   ///     - FixRotationAngle::k270DegreeAndMirror = 7, Rotate 270 degree and apply horizontal flip.
335   ///     - FixRotationAngle::k270Degree = 8, Rotate 270 degree.
336   explicit Rotate(FixRotationAngle angle_id = FixRotationAngle::k0Degree);
337 
338   /// \brief Constructor.
339   /// \param[in] degrees A float value, representing the rotation degrees.
340   /// \param[in] resample An enum for the mode of interpolation.
341   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
342   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
343   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
344   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
345   /// \param[in] expand A boolean representing whether the image is expanded after rotation.
346   /// \param[in] center A float vector of size 2 or empty, representing the x and y center of rotation
347   ///     or the center of the image.
348   /// \param[in] fill_value A vector representing the value to fill the area outside the transform
349   ///    in the output image. If 1 value is provided, it is used for all RGB channels.
350   ///    If 3 values are provided, it is used to fill R, G, B channels respectively.
351   Rotate(float degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
352          std::vector<float> center = {}, std::vector<uint8_t> fill_value = {0, 0, 0});
353 
354   /// \brief Destructor.
355   ~Rotate() = default;
356 
357  protected:
358   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
359   /// \return Shared pointer to TensorOperation object.
360   std::shared_ptr<TensorOperation> Parse() override;
361 
362  private:
363   std::shared_ptr<RotateOperation> op_;
364   struct Data;
365   std::shared_ptr<Data> data_;
366 };
367 
368 }  // namespace vision
369 }  // namespace dataset
370 }  // namespace mindspore
371 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
372