1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
17
18 #include <limits>
19 #include <random>
20
21 #include "minddata/dataset/kernels/data/data_utils.h"
22 #include "minddata/dataset/kernels/image/image_utils.h"
23 #include "minddata/dataset/util/random.h"
24 #include "minddata/dataset/util/status.h"
25
26 namespace mindspore {
27 namespace dataset {
RandomCropAndResizeOp(int32_t target_height,int32_t target_width,float scale_lb,float scale_ub,float aspect_lb,float aspect_ub,InterpolationMode interpolation,int32_t max_attempts)28 RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t target_width, float scale_lb,
29 float scale_ub, float aspect_lb, float aspect_ub,
30 InterpolationMode interpolation, int32_t max_attempts)
31 : target_height_(target_height),
32 target_width_(target_width),
33 rnd_scale_(scale_lb, scale_ub),
34 rnd_aspect_(log(aspect_lb), log(aspect_ub)),
35 interpolation_(interpolation),
36 aspect_lb_(aspect_lb),
37 aspect_ub_(aspect_ub),
38 max_iter_(max_attempts) {}
39
Compute(const TensorRow & input,TensorRow * output)40 Status RandomCropAndResizeOp::Compute(const TensorRow &input, TensorRow *output) {
41 IO_CHECK_VECTOR(input, output);
42
43 for (size_t i = 0; i < input.size(); i++) {
44 if (input[i]->Rank() < kMinImageRank) {
45 RETURN_STATUS_UNEXPECTED("RandomResizedCrop: input tensor should have at least 2 dimensions, but got: " +
46 std::to_string(input[i]->Rank()));
47 }
48 if (i < input.size() - 1) {
49 std::vector<dsize_t> size;
50 std::vector<dsize_t> next_size;
51 RETURN_IF_NOT_OK(ImageSize(input[i], &size));
52 RETURN_IF_NOT_OK(ImageSize(input[i + 1], &next_size));
53 if (size[0] != next_size[0] || size[1] != next_size[1]) {
54 RETURN_STATUS_UNEXPECTED(
55 "RandomCropAndResizeOp: Input tensor in different columns of each row must have the same size.");
56 }
57 }
58 }
59 output->resize(input.size());
60 int x = 0;
61 int y = 0;
62 int crop_height = 0;
63 int crop_width = 0;
64 for (size_t i = 0; i < input.size(); i++) {
65 auto input_shape = input[i]->shape();
66 std::vector<dsize_t> size;
67 RETURN_IF_NOT_OK(ImageSize(input[i], &size));
68 int h_in = static_cast<int>(size[0]);
69 int w_in = static_cast<int>(size[1]);
70 if (i == 0) {
71 RETURN_IF_NOT_OK(GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width));
72 }
73 if (input[i]->Rank() <= kDefaultImageRank) {
74 RETURN_IF_NOT_OK(CropAndResize(input[i], &(*output)[i], x, y, crop_height, crop_width, target_height_,
75 target_width_, interpolation_));
76 } else if (input[i]->Rank() > kDefaultImageRank) {
77 dsize_t num_batch = input[i]->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]);
78 TensorShape new_shape({num_batch, input_shape[-3], input_shape[-2], input_shape[-1]});
79 RETURN_IF_NOT_OK(input[i]->Reshape(new_shape));
80 // split [N, H, W, C] to N [H, W, C], and Resize N [H, W, C]
81 std::vector<std::shared_ptr<Tensor>> input_vector_hwc, output_vector_hwc;
82 RETURN_IF_NOT_OK(BatchTensorToTensorVector(input[i], &input_vector_hwc));
83 for (const auto &input_hwc : input_vector_hwc) {
84 std::shared_ptr<Tensor> output_img;
85 RETURN_IF_NOT_OK(CropAndResize(input_hwc, &output_img, x, y, crop_height, crop_width, target_height_,
86 target_width_, interpolation_));
87 output_vector_hwc.push_back(output_img);
88 }
89 RETURN_IF_NOT_OK(TensorVectorToBatchTensor(output_vector_hwc, &(*output)[i]));
90 auto output_shape = ComputeOutputShape(input_shape);
91 RETURN_IF_NOT_OK((*output)[i]->Reshape(output_shape));
92 }
93 }
94 return Status::OK();
95 }
96
ComputeOutputShape(const TensorShape & input) const97 TensorShape RandomCropAndResizeOp::ComputeOutputShape(const TensorShape &input) const {
98 auto out_shape_vec = input.AsVector();
99 auto size = out_shape_vec.size();
100 int32_t kHeightIdx = -3;
101 int32_t kWidthIdx = -2;
102 out_shape_vec[size + kHeightIdx] = target_height_;
103 out_shape_vec[size + kWidthIdx] = target_width_;
104 TensorShape out = TensorShape(out_shape_vec);
105 return out;
106 }
107
OutputShape(const std::vector<TensorShape> & inputs,std::vector<TensorShape> & outputs)108 Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
109 RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
110 outputs.clear();
111 TensorShape out = TensorShape{target_height_, target_width_};
112 if (inputs[0].Rank() == 2) {
113 (void)outputs.emplace_back(out);
114 }
115 if (inputs[0].Rank() == 3) {
116 (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
117 } else if (inputs[0].Rank() > kDefaultImageRank) {
118 auto out_shape = ComputeOutputShape(inputs[0]);
119 (void)outputs.emplace_back(out_shape);
120 }
121 CHECK_FAIL_RETURN_UNEXPECTED(!outputs.empty(),
122 "RandomCropAndResize: input tensor should have at least 2 dimensions, but got: " +
123 std::to_string(inputs[0].Rank()));
124 return Status::OK();
125 }
126
GetCropBox(int h_in,int w_in,int * x,int * y,int * crop_height,int * crop_width)127 Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) {
128 CHECK_FAIL_RETURN_UNEXPECTED(crop_height != nullptr, "crop_height is nullptr.");
129 CHECK_FAIL_RETURN_UNEXPECTED(crop_width != nullptr, "crop_width is nullptr.");
130 *crop_width = w_in;
131 *crop_height = h_in;
132 CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "RandomCropAndResize: Width of input cannot be 0.");
133 CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "RandomCropAndResize: Height of input cannot be 0.");
134 CHECK_FAIL_RETURN_UNEXPECTED(
135 aspect_lb_ > 0,
136 "RandomCropAndResize: 'ratio'(aspect) lower bound must be greater than 0, but got:" + std::to_string(aspect_lb_));
137 for (int32_t i = 0; i < max_iter_; i++) {
138 double const sample_scale = rnd_scale_(random_generator_);
139 // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale.
140 // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale.
141 double const sample_aspect = exp(rnd_aspect_(random_generator_));
142
143 CHECK_FAIL_RETURN_UNEXPECTED(
144 (std::numeric_limits<int32_t>::max() / h_in) > w_in,
145 "RandomCropAndResizeOp: multiplication out of bounds, check image width and image height first.");
146 CHECK_FAIL_RETURN_UNEXPECTED(
147 static_cast<double>((std::numeric_limits<int32_t>::max() / h_in) / w_in) > sample_scale,
148 "RandomCropAndResizeOp: multiplication out of bounds, check image width, image height and sample scale first.");
149 CHECK_FAIL_RETURN_UNEXPECTED(
150 static_cast<double>((std::numeric_limits<int32_t>::max() / h_in) / w_in) / sample_scale > sample_aspect,
151 "RandomCropAndResizeOp: multiplication out of bounds, check image width, image "
152 "height, sample scale and sample aspect first.");
153 *crop_width = static_cast<int32_t>(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect)));
154 *crop_height = static_cast<int32_t>(std::round(*crop_width / sample_aspect));
155
156 // forbidden crop_width or crop_height is zero
157 if (*crop_width <= 0) {
158 *crop_width = 1;
159 }
160 if (*crop_height <= 0) {
161 *crop_height = 1;
162 }
163
164 if (*crop_width <= w_in && *crop_height <= h_in) {
165 std::uniform_int_distribution<> rd_x(0, w_in - *crop_width);
166 std::uniform_int_distribution<> rd_y(0, h_in - *crop_height);
167 *x = rd_x(random_generator_);
168 *y = rd_y(random_generator_);
169 return Status::OK();
170 }
171 }
172 double const img_aspect = static_cast<double>(w_in) / h_in;
173 if (img_aspect < aspect_lb_) {
174 *crop_width = w_in;
175 *crop_height = static_cast<int32_t>(std::round(*crop_width / static_cast<double>(aspect_lb_)));
176 } else {
177 if (img_aspect > aspect_ub_) {
178 *crop_height = h_in;
179 *crop_width = static_cast<int32_t>(std::round(*crop_height * static_cast<double>(aspect_ub_)));
180 } else {
181 *crop_width = w_in;
182 *crop_height = h_in;
183 }
184 }
185 constexpr float crop_ratio = 2.0;
186 // forbidden crop_width or crop_height is zero
187 if (*crop_width <= 0) {
188 *crop_width = 1;
189 }
190 if (*crop_height <= 0) {
191 *crop_height = 1;
192 }
193
194 *x = static_cast<int32_t>(std::round(static_cast<float>(w_in - *crop_width) / crop_ratio));
195 *y = static_cast<int32_t>(std::round(static_cast<float>(h_in - *crop_height) / crop_ratio));
196 return Status::OK();
197 }
198 } // namespace dataset
199 } // namespace mindspore
200