1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/kernels/image/random_crop_op.h"
17
18 #include "minddata/dataset/kernels/data/data_utils.h"
19 #include "minddata/dataset/kernels/image/image_utils.h"
20 #include "minddata/dataset/util/random.h"
21 #include "minddata/dataset/util/status.h"
22
23 namespace mindspore {
24 namespace dataset {
RandomCropOp(int32_t crop_height,int32_t crop_width,int32_t pad_top,int32_t pad_bottom,int32_t pad_left,int32_t pad_right,bool pad_if_needed,BorderType padding_mode,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)25 RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_top, int32_t pad_bottom,
26 int32_t pad_left, int32_t pad_right, bool pad_if_needed, BorderType padding_mode,
27 uint8_t fill_r, uint8_t fill_g, uint8_t fill_b)
28 : crop_height_(crop_height),
29 crop_width_(crop_width),
30 pad_top_(pad_top),
31 pad_bottom_(pad_bottom),
32 pad_left_(pad_left),
33 pad_right_(pad_right),
34 pad_if_needed_(pad_if_needed),
35 border_type_(padding_mode),
36 fill_r_(fill_r),
37 fill_g_(fill_g),
38 fill_b_(fill_b) {}
39
ImagePadding(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * pad_image,int32_t * t_pad_top,int32_t * t_pad_bottom,int32_t * t_pad_left,int32_t * t_pad_right,int32_t * padded_image_w,int32_t * padded_image_h,bool * crop_further)40 Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image,
41 int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right,
42 int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) {
43 *t_pad_top = pad_top_;
44 *t_pad_bottom = pad_bottom_;
45 *t_pad_left = pad_left_;
46 *t_pad_right = pad_right_;
47
48 constexpr int64_t max_ratio = 3;
49 CHECK_FAIL_RETURN_UNEXPECTED(
50 pad_top_ < input->shape()[0] * max_ratio && pad_bottom_ < input->shape()[0] * max_ratio &&
51 pad_left_ < input->shape()[1] * max_ratio && pad_right_ < input->shape()[1] * max_ratio,
52 "RandomCrop: padding size is three times bigger than the image size, padding top: " + std::to_string(pad_top_) +
53 ", padding bottom: " + std::to_string(pad_bottom_) + ", padding pad_left_: " + std::to_string(pad_left_) +
54 ", padding padding right:" + std::to_string(pad_right_) + ", image shape: " + std::to_string(input->shape()[0]) +
55 ", " + std::to_string(input->shape()[1]));
56
57 RETURN_IF_NOT_OK(
58 Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_));
59 CHECK_FAIL_RETURN_UNEXPECTED(
60 (*pad_image)->shape().Size() >= 2,
61 "RandomCrop: invalid shape of image after pad, got rank: " + std::to_string((*pad_image)->shape().Size()));
62
63 *padded_image_h = static_cast<int32_t>((*pad_image)->shape()[0]);
64 *padded_image_w = static_cast<int32_t>((*pad_image)->shape()[1]);
65
66 if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) {
67 *crop_further = false; // no need for further crop
68 return Status::OK();
69 } else if (pad_if_needed_) {
70 // check the dimensions of the image for padding, if we do need padding, then we change the pad values
71 if (*padded_image_h < crop_height_) {
72 RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0,
73 border_type_, fill_r_, fill_g_, fill_b_));
74
75 // update pad total above/below
76 t_pad_top += ((ptrdiff_t)crop_height_ - *padded_image_h);
77 t_pad_bottom += ((ptrdiff_t)crop_height_ - *padded_image_h);
78 }
79 if (*padded_image_w < crop_width_) {
80 RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w,
81 border_type_, fill_r_, fill_g_, fill_b_));
82 // update pad total left/right
83 t_pad_left += ((ptrdiff_t)crop_width_ - *padded_image_w);
84 t_pad_right += ((ptrdiff_t)crop_width_ - *padded_image_w);
85 }
86 *padded_image_h = static_cast<int32_t>((*pad_image)->shape()[0]);
87 *padded_image_w = static_cast<int32_t>((*pad_image)->shape()[1]);
88 }
89
90 if (crop_height_ == 0 || crop_width_ == 0) {
91 RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
92 "RandomCrop: invalid crop size, crop width or crop height is not allowed to be zero.");
93 }
94 if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) {
95 RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
96 "RandomCrop: invalid crop size, crop size is bigger than the image dimensions, "
97 "got crop height: " +
98 std::to_string(crop_height_) + ", crop width: " + std::to_string(crop_width_));
99 }
100 return Status::OK();
101 }
102
GenRandomXY(int32_t * x,int32_t * y,int32_t padded_image_w,int32_t padded_image_h)103 void RandomCropOp::GenRandomXY(int32_t *x, int32_t *y, int32_t padded_image_w, int32_t padded_image_h) {
104 // GenCropPoints for cropping
105 *x = std::uniform_int_distribution<int>(0, padded_image_w - crop_width_)(random_generator_);
106 *y = std::uniform_int_distribution<int>(0, padded_image_h - crop_height_)(random_generator_);
107 }
108
RandomCropImg(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t * x,int32_t * y,int32_t index)109 Status RandomCropOp::RandomCropImg(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t *x,
110 int32_t *y, int32_t index) {
111 std::shared_ptr<Tensor> pad_image = nullptr;
112 int32_t t_pad_top = 0;
113 int32_t t_pad_bottom = 0;
114 int32_t t_pad_left = 0;
115 int32_t t_pad_right = 0;
116 int32_t padded_image_w = 0;
117 int32_t padded_image_h = 0;
118 bool crop_further = true; // whether image needs further cropping based on new size & requirements
119
120 RETURN_IF_NOT_OK( // error code sent back directly
121 ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w,
122 &padded_image_h, &crop_further));
123 if (!crop_further) {
124 *output = pad_image;
125 return Status::OK();
126 }
127 if (index == 0) {
128 GenRandomXY(x, y, padded_image_w, padded_image_h);
129 }
130 RETURN_IF_NOT_OK(Crop(pad_image, output, *x, *y, crop_width_, crop_height_));
131
132 return Status::OK();
133 }
134
ConstructShape(const TensorShape & in_shape,std::shared_ptr<TensorShape> * out_shape) const135 Status RandomCropOp::ConstructShape(const TensorShape &in_shape, std::shared_ptr<TensorShape> *out_shape) const {
136 auto in_shape_vec = in_shape.AsVector();
137 const int h_index = -3;
138 const int w_index = -2;
139 in_shape_vec[in_shape_vec.size() + h_index] = crop_height_;
140 in_shape_vec[in_shape_vec.size() + w_index] = crop_width_;
141
142 *out_shape = std::make_shared<TensorShape>(in_shape_vec);
143
144 return Status::OK();
145 }
146
Compute(const TensorRow & input,TensorRow * output)147 Status RandomCropOp::Compute(const TensorRow &input, TensorRow *output) {
148 IO_CHECK_VECTOR(input, output);
149
150 for (const auto &image : input) {
151 if (image->shape().Rank() < kMinImageRank) {
152 std::string err_msg =
153 "RandomCropOp: input tensor should have at least 2 dimensions, but got: " + std::to_string(image->Rank());
154 RETURN_STATUS_UNEXPECTED(err_msg);
155 }
156 }
157
158 if (input.size() > 1) {
159 for (size_t i = 0; i < input.size() - 1; i++) {
160 if (input[i]->shape()[0] != input[i + 1]->shape()[0] || input[i]->shape()[1] != input[i + 1]->shape()[1]) {
161 RETURN_STATUS_UNEXPECTED(
162 "RandomCrop: Input images in different column must have the same shape, check the output shape in "
163 "specified 'input_columns' before call this operation.");
164 }
165 }
166 }
167
168 const auto output_count = input.size();
169 output->resize(output_count);
170 int32_t x = 0;
171 int32_t y = 0;
172 for (size_t i = 0; i < input.size(); i++) {
173 if (input[i]->shape().Rank() <= kDefaultImageRank) { // keep original logic untained
174 RETURN_IF_NOT_OK(RandomCropImg(input[i], &(*output)[i], &x, &y, i));
175 } else { // deal with videos
176 // reshape input to hwc
177 auto input_shape = input[i]->shape();
178 dsize_t num_batch = input[i]->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]);
179 TensorShape new_shape({num_batch, input_shape[-3], input_shape[-2], input_shape[-1]});
180 RETURN_IF_NOT_OK(input[i]->Reshape(new_shape));
181
182 // split [N, H, W, C] to N [H, W, C], and center crop N [H, W, C]
183 std::vector<std::shared_ptr<Tensor>> input_vector_hwc, output_vector_hwc;
184 RETURN_IF_NOT_OK(BatchTensorToTensorVector(input[i], &input_vector_hwc));
185
186 // perform randomCrop
187 for (int32_t idx = 0; idx < num_batch; idx++) {
188 std::shared_ptr<Tensor> random_crop;
189 RETURN_IF_NOT_OK(RandomCropImg(input_vector_hwc[idx], &random_crop, &x, &y, i));
190 output_vector_hwc.push_back(random_crop);
191 }
192
193 // integrate N [H, W, C] to [N, H, W, C], and reshape [..., H, W, C]
194 RETURN_IF_NOT_OK(TensorVectorToBatchTensor(output_vector_hwc, &(*output)[i]));
195
196 // reshape output before return, only height and width are changed
197 std::shared_ptr<TensorShape> output_shape_new;
198 RETURN_IF_NOT_OK(ConstructShape(input_shape, &output_shape_new));
199 RETURN_IF_NOT_OK((*output)[i]->Reshape(*output_shape_new));
200 }
201 }
202
203 return Status::OK();
204 }
205
OutputShape(const std::vector<TensorShape> & inputs,std::vector<TensorShape> & outputs)206 Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
207 RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
208 outputs.clear();
209 TensorShape out = TensorShape{crop_height_, crop_width_};
210 if (inputs[0].Rank() == kMinImageRank) {
211 (void)outputs.emplace_back(out);
212 } else if (inputs[0].Rank() == kDefaultImageRank) {
213 (void)outputs.emplace_back(out.AppendDim(inputs[0][kChannelIndexHWC]));
214 } else if (inputs[0].Rank() > kDefaultImageRank) {
215 std::shared_ptr<TensorShape> output_shape_new;
216 RETURN_IF_NOT_OK(ConstructShape(inputs[0], &output_shape_new));
217 (void)outputs.emplace_back(*output_shape_new);
218 }
219 if (!outputs.empty()) {
220 return Status::OK();
221 }
222 RETURN_STATUS_UNEXPECTED("RandomCrop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
223 std::to_string(inputs[0].Rank()));
224 }
225 } // namespace dataset
226 } // namespace mindspore
227