• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
17 
18 #include <limits>
19 #include <random>
20 
21 #include "minddata/dataset/kernels/data/data_utils.h"
22 #include "minddata/dataset/kernels/image/image_utils.h"
23 #include "minddata/dataset/util/random.h"
24 #include "minddata/dataset/util/status.h"
25 
26 namespace mindspore {
27 namespace dataset {
RandomCropAndResizeOp(int32_t target_height,int32_t target_width,float scale_lb,float scale_ub,float aspect_lb,float aspect_ub,InterpolationMode interpolation,int32_t max_attempts)28 RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t target_width, float scale_lb,
29                                              float scale_ub, float aspect_lb, float aspect_ub,
30                                              InterpolationMode interpolation, int32_t max_attempts)
31     : target_height_(target_height),
32       target_width_(target_width),
33       rnd_scale_(scale_lb, scale_ub),
34       rnd_aspect_(log(aspect_lb), log(aspect_ub)),
35       interpolation_(interpolation),
36       aspect_lb_(aspect_lb),
37       aspect_ub_(aspect_ub),
38       max_iter_(max_attempts) {}
39 
Compute(const TensorRow & input,TensorRow * output)40 Status RandomCropAndResizeOp::Compute(const TensorRow &input, TensorRow *output) {
41   IO_CHECK_VECTOR(input, output);
42 
43   for (size_t i = 0; i < input.size(); i++) {
44     if (input[i]->Rank() < kMinImageRank) {
45       RETURN_STATUS_UNEXPECTED("RandomResizedCrop: input tensor should have at least 2 dimensions, but got: " +
46                                std::to_string(input[i]->Rank()));
47     }
48     if (i < input.size() - 1) {
49       std::vector<dsize_t> size;
50       std::vector<dsize_t> next_size;
51       RETURN_IF_NOT_OK(ImageSize(input[i], &size));
52       RETURN_IF_NOT_OK(ImageSize(input[i + 1], &next_size));
53       if (size[0] != next_size[0] || size[1] != next_size[1]) {
54         RETURN_STATUS_UNEXPECTED(
55           "RandomCropAndResizeOp: Input tensor in different columns of each row must have the same size.");
56       }
57     }
58   }
59   output->resize(input.size());
60   int x = 0;
61   int y = 0;
62   int crop_height = 0;
63   int crop_width = 0;
64   for (size_t i = 0; i < input.size(); i++) {
65     auto input_shape = input[i]->shape();
66     std::vector<dsize_t> size;
67     RETURN_IF_NOT_OK(ImageSize(input[i], &size));
68     int h_in = static_cast<int>(size[0]);
69     int w_in = static_cast<int>(size[1]);
70     if (i == 0) {
71       RETURN_IF_NOT_OK(GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width));
72     }
73     if (input[i]->Rank() <= kDefaultImageRank) {
74       RETURN_IF_NOT_OK(CropAndResize(input[i], &(*output)[i], x, y, crop_height, crop_width, target_height_,
75                                      target_width_, interpolation_));
76     } else if (input[i]->Rank() > kDefaultImageRank) {
77       dsize_t num_batch = input[i]->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]);
78       TensorShape new_shape({num_batch, input_shape[-3], input_shape[-2], input_shape[-1]});
79       RETURN_IF_NOT_OK(input[i]->Reshape(new_shape));
80       // split [N, H, W, C] to N [H, W, C], and Resize N [H, W, C]
81       std::vector<std::shared_ptr<Tensor>> input_vector_hwc, output_vector_hwc;
82       RETURN_IF_NOT_OK(BatchTensorToTensorVector(input[i], &input_vector_hwc));
83       for (const auto &input_hwc : input_vector_hwc) {
84         std::shared_ptr<Tensor> output_img;
85         RETURN_IF_NOT_OK(CropAndResize(input_hwc, &output_img, x, y, crop_height, crop_width, target_height_,
86                                        target_width_, interpolation_));
87         output_vector_hwc.push_back(output_img);
88       }
89       RETURN_IF_NOT_OK(TensorVectorToBatchTensor(output_vector_hwc, &(*output)[i]));
90       auto output_shape = ComputeOutputShape(input_shape);
91       RETURN_IF_NOT_OK((*output)[i]->Reshape(output_shape));
92     }
93   }
94   return Status::OK();
95 }
96 
ComputeOutputShape(const TensorShape & input) const97 TensorShape RandomCropAndResizeOp::ComputeOutputShape(const TensorShape &input) const {
98   auto out_shape_vec = input.AsVector();
99   auto size = out_shape_vec.size();
100   int32_t kHeightIdx = -3;
101   int32_t kWidthIdx = -2;
102   out_shape_vec[size + kHeightIdx] = target_height_;
103   out_shape_vec[size + kWidthIdx] = target_width_;
104   TensorShape out = TensorShape(out_shape_vec);
105   return out;
106 }
107 
OutputShape(const std::vector<TensorShape> & inputs,std::vector<TensorShape> & outputs)108 Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
109   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
110   outputs.clear();
111   TensorShape out = TensorShape{target_height_, target_width_};
112   if (inputs[0].Rank() == 2) {
113     (void)outputs.emplace_back(out);
114   }
115   if (inputs[0].Rank() == 3) {
116     (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
117   } else if (inputs[0].Rank() > kDefaultImageRank) {
118     auto out_shape = ComputeOutputShape(inputs[0]);
119     (void)outputs.emplace_back(out_shape);
120   }
121   CHECK_FAIL_RETURN_UNEXPECTED(!outputs.empty(),
122                                "RandomCropAndResize: input tensor should have at least 2 dimensions, but got: " +
123                                  std::to_string(inputs[0].Rank()));
124   return Status::OK();
125 }
126 
GetCropBox(int h_in,int w_in,int * x,int * y,int * crop_height,int * crop_width)127 Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) {
128   CHECK_FAIL_RETURN_UNEXPECTED(crop_height != nullptr, "crop_height is nullptr.");
129   CHECK_FAIL_RETURN_UNEXPECTED(crop_width != nullptr, "crop_width is nullptr.");
130   *crop_width = w_in;
131   *crop_height = h_in;
132   CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "RandomCropAndResize: Width of input cannot be 0.");
133   CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "RandomCropAndResize: Height of input cannot be 0.");
134   CHECK_FAIL_RETURN_UNEXPECTED(
135     aspect_lb_ > 0,
136     "RandomCropAndResize: 'ratio'(aspect) lower bound must be greater than 0, but got:" + std::to_string(aspect_lb_));
137   for (int32_t i = 0; i < max_iter_; i++) {
138     double const sample_scale = rnd_scale_(random_generator_);
139     // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale.
140     // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale.
141     double const sample_aspect = exp(rnd_aspect_(random_generator_));
142 
143     CHECK_FAIL_RETURN_UNEXPECTED(
144       (std::numeric_limits<int32_t>::max() / h_in) > w_in,
145       "RandomCropAndResizeOp: multiplication out of bounds, check image width and image height first.");
146     CHECK_FAIL_RETURN_UNEXPECTED(
147       static_cast<double>((std::numeric_limits<int32_t>::max() / h_in) / w_in) > sample_scale,
148       "RandomCropAndResizeOp: multiplication out of bounds, check image width, image height and sample scale first.");
149     CHECK_FAIL_RETURN_UNEXPECTED(
150       static_cast<double>((std::numeric_limits<int32_t>::max() / h_in) / w_in) / sample_scale > sample_aspect,
151       "RandomCropAndResizeOp: multiplication out of bounds, check image width, image "
152       "height, sample scale and sample aspect first.");
153     *crop_width = static_cast<int32_t>(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect)));
154     *crop_height = static_cast<int32_t>(std::round(*crop_width / sample_aspect));
155 
156     // forbidden crop_width or crop_height is zero
157     if (*crop_width <= 0) {
158       *crop_width = 1;
159     }
160     if (*crop_height <= 0) {
161       *crop_height = 1;
162     }
163 
164     if (*crop_width <= w_in && *crop_height <= h_in) {
165       std::uniform_int_distribution<> rd_x(0, w_in - *crop_width);
166       std::uniform_int_distribution<> rd_y(0, h_in - *crop_height);
167       *x = rd_x(random_generator_);
168       *y = rd_y(random_generator_);
169       return Status::OK();
170     }
171   }
172   double const img_aspect = static_cast<double>(w_in) / h_in;
173   if (img_aspect < aspect_lb_) {
174     *crop_width = w_in;
175     *crop_height = static_cast<int32_t>(std::round(*crop_width / static_cast<double>(aspect_lb_)));
176   } else {
177     if (img_aspect > aspect_ub_) {
178       *crop_height = h_in;
179       *crop_width = static_cast<int32_t>(std::round(*crop_height * static_cast<double>(aspect_ub_)));
180     } else {
181       *crop_width = w_in;
182       *crop_height = h_in;
183     }
184   }
185   constexpr float crop_ratio = 2.0;
186   // forbidden crop_width or crop_height is zero
187   if (*crop_width <= 0) {
188     *crop_width = 1;
189   }
190   if (*crop_height <= 0) {
191     *crop_height = 1;
192   }
193 
194   *x = static_cast<int32_t>(std::round(static_cast<float>(w_in - *crop_width) / crop_ratio));
195   *y = static_cast<int32_t>(std::round(static_cast<float>(h_in - *crop_height) / crop_ratio));
196   return Status::OK();
197 }
198 }  // namespace dataset
199 }  // namespace mindspore
200