1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
17 #include <limits>
18 #include <random>
19
20 #include "minddata/dataset/kernels/image/image_utils.h"
21 #include "minddata/dataset/util/random.h"
22 #include "minddata/dataset/util/status.h"
23
24 namespace mindspore {
25 namespace dataset {
26 const float RandomCropAndResizeOp::kDefScaleLb = 0.08;
27 const float RandomCropAndResizeOp::kDefScaleUb = 1.0;
28 const float RandomCropAndResizeOp::kDefAspectLb = 0.75;
29 const float RandomCropAndResizeOp::kDefAspectUb = 1.333333;
30 const InterpolationMode RandomCropAndResizeOp::kDefInterpolation = InterpolationMode::kLinear;
31 const int32_t RandomCropAndResizeOp::kDefMaxIter = 10;
32
RandomCropAndResizeOp(int32_t target_height,int32_t target_width,float scale_lb,float scale_ub,float aspect_lb,float aspect_ub,InterpolationMode interpolation,int32_t max_attempts)33 RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t target_width, float scale_lb,
34 float scale_ub, float aspect_lb, float aspect_ub,
35 InterpolationMode interpolation, int32_t max_attempts)
36 : target_height_(target_height),
37 target_width_(target_width),
38 rnd_scale_(scale_lb, scale_ub),
39 rnd_aspect_(log(aspect_lb), log(aspect_ub)),
40 interpolation_(interpolation),
41 aspect_lb_(aspect_lb),
42 aspect_ub_(aspect_ub),
43 max_iter_(max_attempts) {
44 rnd_.seed(GetSeed());
45 is_deterministic_ = false;
46 }
47
Compute(const TensorRow & input,TensorRow * output)48 Status RandomCropAndResizeOp::Compute(const TensorRow &input, TensorRow *output) {
49 IO_CHECK_VECTOR(input, output);
50 if (input.size() != 1) {
51 for (size_t i = 0; i < input.size() - 1; i++) {
52 if (input[i]->Rank() != 2 && input[i]->Rank() != 3) {
53 std::string err_msg = "RandomCropAndResizeOp: image shape is not <H,W,C> or <H, W>, but got rank:" +
54 std::to_string(input[i]->Rank());
55 RETURN_STATUS_UNEXPECTED(err_msg);
56 }
57 if (input[i]->shape()[0] != input[i + 1]->shape()[0] || input[i]->shape()[1] != input[i + 1]->shape()[1]) {
58 RETURN_STATUS_UNEXPECTED("RandomCropAndResizeOp: Input images must have the same size.");
59 }
60 }
61 }
62 const int output_count = input.size();
63 output->resize(output_count);
64 int x = 0;
65 int y = 0;
66 int crop_height = 0;
67 int crop_width = 0;
68 for (size_t i = 0; i < input.size(); i++) {
69 RETURN_IF_NOT_OK(ValidateImageRank("RandomCropAndResize", input[i]->shape().Size()));
70 int h_in = input[i]->shape()[0];
71 int w_in = input[i]->shape()[1];
72 if (i == 0) {
73 RETURN_IF_NOT_OK(GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width));
74 }
75 RETURN_IF_NOT_OK(CropAndResize(input[i], &(*output)[i], x, y, crop_height, crop_width, target_height_,
76 target_width_, interpolation_));
77 }
78 return Status::OK();
79 }
80
OutputShape(const std::vector<TensorShape> & inputs,std::vector<TensorShape> & outputs)81 Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
82 RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
83 outputs.clear();
84 TensorShape out = TensorShape{target_height_, target_width_};
85 if (inputs[0].Rank() == 2) {
86 (void)outputs.emplace_back(out);
87 }
88 if (inputs[0].Rank() == 3) {
89 (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
90 }
91 if (!outputs.empty()) {
92 return Status::OK();
93 }
94 return Status(StatusCode::kMDUnexpectedError,
95 "RandomCropAndResize: invalid input shape, expected 2D or 3D input, but got input dimension is: " +
96 std::to_string(inputs[0].Rank()));
97 }
GetCropBox(int h_in,int w_in,int * x,int * y,int * crop_height,int * crop_width)98 Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) {
99 CHECK_FAIL_RETURN_UNEXPECTED(crop_height != nullptr, "crop_height is nullptr.");
100 CHECK_FAIL_RETURN_UNEXPECTED(crop_width != nullptr, "crop_width is nullptr.");
101 *crop_width = w_in;
102 *crop_height = h_in;
103 CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "RandomCropAndResize: Width cannot be 0.");
104 CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "RandomCropAndResize: Height cannot be 0.");
105 CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "RandomCropAndResize: aspect lower bound must be greater than zero.");
106 for (int32_t i = 0; i < max_iter_; i++) {
107 double const sample_scale = rnd_scale_(rnd_);
108 // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale.
109 // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale.
110 double const sample_aspect = exp(rnd_aspect_(rnd_));
111
112 CHECK_FAIL_RETURN_UNEXPECTED(
113 (std::numeric_limits<int32_t>::max() / h_in) > w_in,
114 "RandomCropAndResizeOp: multiplication out of bounds, check image width and image height first.");
115 CHECK_FAIL_RETURN_UNEXPECTED(
116 (std::numeric_limits<int32_t>::max() / h_in / w_in) > sample_scale,
117 "RandomCropAndResizeOp: multiplication out of bounds, check image width, image height and sample scale first.");
118 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / h_in / w_in / sample_scale) > sample_aspect,
119 "RandomCropAndResizeOp: multiplication out of bounds, check image width, image "
120 "height, sample scale and sample aspect first.");
121 *crop_width = static_cast<int32_t>(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect)));
122 *crop_height = static_cast<int32_t>(std::round(*crop_width / sample_aspect));
123
124 // forbidden crop_width or crop_height is zero
125 if (*crop_width <= 0) {
126 *crop_width = 1;
127 }
128 if (*crop_height <= 0) {
129 *crop_height = 1;
130 }
131
132 if (*crop_width <= w_in && *crop_height <= h_in) {
133 std::uniform_int_distribution<> rd_x(0, w_in - *crop_width);
134 std::uniform_int_distribution<> rd_y(0, h_in - *crop_height);
135 *x = rd_x(rnd_);
136 *y = rd_y(rnd_);
137 return Status::OK();
138 }
139 }
140 double const img_aspect = static_cast<double>(w_in) / h_in;
141 if (img_aspect < aspect_lb_) {
142 *crop_width = w_in;
143 *crop_height = static_cast<int32_t>(std::round(*crop_width / static_cast<double>(aspect_lb_)));
144 } else {
145 if (img_aspect > aspect_ub_) {
146 *crop_height = h_in;
147 *crop_width = static_cast<int32_t>(std::round(*crop_height * static_cast<double>(aspect_ub_)));
148 } else {
149 *crop_width = w_in;
150 *crop_height = h_in;
151 }
152 }
153 constexpr float crop_ratio = 2.0;
154 // forbidden crop_width or crop_height is zero
155 if (*crop_width <= 0) {
156 *crop_width = 1;
157 }
158 if (*crop_height <= 0) {
159 *crop_height = 1;
160 }
161
162 *x = static_cast<int32_t>(std::round((w_in - *crop_width) / crop_ratio));
163 *y = static_cast<int32_t>(std::round((h_in - *crop_height) / crop_ratio));
164 return Status::OK();
165 }
166 } // namespace dataset
167 } // namespace mindspore
168