• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/kernels/image/random_crop_op.h"
17 
18 #include "minddata/dataset/kernels/data/data_utils.h"
19 #include "minddata/dataset/kernels/image/image_utils.h"
20 #include "minddata/dataset/util/random.h"
21 #include "minddata/dataset/util/status.h"
22 
23 namespace mindspore {
24 namespace dataset {
RandomCropOp(int32_t crop_height,int32_t crop_width,int32_t pad_top,int32_t pad_bottom,int32_t pad_left,int32_t pad_right,bool pad_if_needed,BorderType padding_mode,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)25 RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_top, int32_t pad_bottom,
26                            int32_t pad_left, int32_t pad_right, bool pad_if_needed, BorderType padding_mode,
27                            uint8_t fill_r, uint8_t fill_g, uint8_t fill_b)
28     : crop_height_(crop_height),
29       crop_width_(crop_width),
30       pad_top_(pad_top),
31       pad_bottom_(pad_bottom),
32       pad_left_(pad_left),
33       pad_right_(pad_right),
34       pad_if_needed_(pad_if_needed),
35       border_type_(padding_mode),
36       fill_r_(fill_r),
37       fill_g_(fill_g),
38       fill_b_(fill_b) {}
39 
ImagePadding(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * pad_image,int32_t * t_pad_top,int32_t * t_pad_bottom,int32_t * t_pad_left,int32_t * t_pad_right,int32_t * padded_image_w,int32_t * padded_image_h,bool * crop_further)40 Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image,
41                                   int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right,
42                                   int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) {
43   *t_pad_top = pad_top_;
44   *t_pad_bottom = pad_bottom_;
45   *t_pad_left = pad_left_;
46   *t_pad_right = pad_right_;
47 
48   constexpr int64_t max_ratio = 3;
49   CHECK_FAIL_RETURN_UNEXPECTED(
50     pad_top_ < input->shape()[0] * max_ratio && pad_bottom_ < input->shape()[0] * max_ratio &&
51       pad_left_ < input->shape()[1] * max_ratio && pad_right_ < input->shape()[1] * max_ratio,
52     "RandomCrop: padding size is three times bigger than the image size, padding top: " + std::to_string(pad_top_) +
53       ", padding bottom: " + std::to_string(pad_bottom_) + ", padding pad_left_: " + std::to_string(pad_left_) +
54       ", padding padding right:" + std::to_string(pad_right_) + ", image shape: " + std::to_string(input->shape()[0]) +
55       ", " + std::to_string(input->shape()[1]));
56 
57   RETURN_IF_NOT_OK(
58     Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_));
59   CHECK_FAIL_RETURN_UNEXPECTED(
60     (*pad_image)->shape().Size() >= 2,
61     "RandomCrop: invalid shape of image after pad, got rank: " + std::to_string((*pad_image)->shape().Size()));
62 
63   *padded_image_h = static_cast<int32_t>((*pad_image)->shape()[0]);
64   *padded_image_w = static_cast<int32_t>((*pad_image)->shape()[1]);
65 
66   if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) {
67     *crop_further = false;  //  no need for further crop
68     return Status::OK();
69   } else if (pad_if_needed_) {
70     // check the dimensions of the image for padding, if we do need padding, then we change the pad values
71     if (*padded_image_h < crop_height_) {
72       RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0,
73                            border_type_, fill_r_, fill_g_, fill_b_));
74 
75       // update pad total above/below
76       t_pad_top += ((ptrdiff_t)crop_height_ - *padded_image_h);
77       t_pad_bottom += ((ptrdiff_t)crop_height_ - *padded_image_h);
78     }
79     if (*padded_image_w < crop_width_) {
80       RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w,
81                            border_type_, fill_r_, fill_g_, fill_b_));
82       // update pad total left/right
83       t_pad_left += ((ptrdiff_t)crop_width_ - *padded_image_w);
84       t_pad_right += ((ptrdiff_t)crop_width_ - *padded_image_w);
85     }
86     *padded_image_h = static_cast<int32_t>((*pad_image)->shape()[0]);
87     *padded_image_w = static_cast<int32_t>((*pad_image)->shape()[1]);
88   }
89 
90   if (crop_height_ == 0 || crop_width_ == 0) {
91     RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
92                         "RandomCrop: invalid crop size, crop width or crop height is not allowed to be zero.");
93   }
94   if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) {
95     RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
96                         "RandomCrop: invalid crop size, crop size is bigger than the image dimensions, "
97                         "got crop height: " +
98                           std::to_string(crop_height_) + ", crop width: " + std::to_string(crop_width_));
99   }
100   return Status::OK();
101 }
102 
GenRandomXY(int32_t * x,int32_t * y,int32_t padded_image_w,int32_t padded_image_h)103 void RandomCropOp::GenRandomXY(int32_t *x, int32_t *y, int32_t padded_image_w, int32_t padded_image_h) {
104   // GenCropPoints for cropping
105   *x = std::uniform_int_distribution<int>(0, padded_image_w - crop_width_)(random_generator_);
106   *y = std::uniform_int_distribution<int>(0, padded_image_h - crop_height_)(random_generator_);
107 }
108 
RandomCropImg(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t * x,int32_t * y,int32_t index)109 Status RandomCropOp::RandomCropImg(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t *x,
110                                    int32_t *y, int32_t index) {
111   std::shared_ptr<Tensor> pad_image = nullptr;
112   int32_t t_pad_top = 0;
113   int32_t t_pad_bottom = 0;
114   int32_t t_pad_left = 0;
115   int32_t t_pad_right = 0;
116   int32_t padded_image_w = 0;
117   int32_t padded_image_h = 0;
118   bool crop_further = true;  // whether image needs further cropping based on new size & requirements
119 
120   RETURN_IF_NOT_OK(  // error code sent back directly
121     ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w,
122                  &padded_image_h, &crop_further));
123   if (!crop_further) {
124     *output = pad_image;
125     return Status::OK();
126   }
127   if (index == 0) {
128     GenRandomXY(x, y, padded_image_w, padded_image_h);
129   }
130   RETURN_IF_NOT_OK(Crop(pad_image, output, *x, *y, crop_width_, crop_height_));
131 
132   return Status::OK();
133 }
134 
ConstructShape(const TensorShape & in_shape,std::shared_ptr<TensorShape> * out_shape) const135 Status RandomCropOp::ConstructShape(const TensorShape &in_shape, std::shared_ptr<TensorShape> *out_shape) const {
136   auto in_shape_vec = in_shape.AsVector();
137   const int h_index = -3;
138   const int w_index = -2;
139   in_shape_vec[in_shape_vec.size() + h_index] = crop_height_;
140   in_shape_vec[in_shape_vec.size() + w_index] = crop_width_;
141 
142   *out_shape = std::make_shared<TensorShape>(in_shape_vec);
143 
144   return Status::OK();
145 }
146 
Compute(const TensorRow & input,TensorRow * output)147 Status RandomCropOp::Compute(const TensorRow &input, TensorRow *output) {
148   IO_CHECK_VECTOR(input, output);
149 
150   for (const auto &image : input) {
151     if (image->shape().Rank() < kMinImageRank) {
152       std::string err_msg =
153         "RandomCropOp: input tensor should have at least 2 dimensions, but got: " + std::to_string(image->Rank());
154       RETURN_STATUS_UNEXPECTED(err_msg);
155     }
156   }
157 
158   if (input.size() > 1) {
159     for (size_t i = 0; i < input.size() - 1; i++) {
160       if (input[i]->shape()[0] != input[i + 1]->shape()[0] || input[i]->shape()[1] != input[i + 1]->shape()[1]) {
161         RETURN_STATUS_UNEXPECTED(
162           "RandomCrop: Input images in different column must have the same shape, check the output shape in "
163           "specified 'input_columns' before call this operation.");
164       }
165     }
166   }
167 
168   const auto output_count = input.size();
169   output->resize(output_count);
170   int32_t x = 0;
171   int32_t y = 0;
172   for (size_t i = 0; i < input.size(); i++) {
173     if (input[i]->shape().Rank() <= kDefaultImageRank) {  // keep original logic untained
174       RETURN_IF_NOT_OK(RandomCropImg(input[i], &(*output)[i], &x, &y, i));
175     } else {  // deal with videos
176       // reshape input to hwc
177       auto input_shape = input[i]->shape();
178       dsize_t num_batch = input[i]->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]);
179       TensorShape new_shape({num_batch, input_shape[-3], input_shape[-2], input_shape[-1]});
180       RETURN_IF_NOT_OK(input[i]->Reshape(new_shape));
181 
182       // split [N, H, W, C] to N [H, W, C], and center crop N [H, W, C]
183       std::vector<std::shared_ptr<Tensor>> input_vector_hwc, output_vector_hwc;
184       RETURN_IF_NOT_OK(BatchTensorToTensorVector(input[i], &input_vector_hwc));
185 
186       // perform randomCrop
187       for (int32_t idx = 0; idx < num_batch; idx++) {
188         std::shared_ptr<Tensor> random_crop;
189         RETURN_IF_NOT_OK(RandomCropImg(input_vector_hwc[idx], &random_crop, &x, &y, i));
190         output_vector_hwc.push_back(random_crop);
191       }
192 
193       // integrate N [H, W, C] to [N, H, W, C], and reshape [..., H, W, C]
194       RETURN_IF_NOT_OK(TensorVectorToBatchTensor(output_vector_hwc, &(*output)[i]));
195 
196       // reshape output before return, only height and width are changed
197       std::shared_ptr<TensorShape> output_shape_new;
198       RETURN_IF_NOT_OK(ConstructShape(input_shape, &output_shape_new));
199       RETURN_IF_NOT_OK((*output)[i]->Reshape(*output_shape_new));
200     }
201   }
202 
203   return Status::OK();
204 }
205 
OutputShape(const std::vector<TensorShape> & inputs,std::vector<TensorShape> & outputs)206 Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
207   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
208   outputs.clear();
209   TensorShape out = TensorShape{crop_height_, crop_width_};
210   if (inputs[0].Rank() == kMinImageRank) {
211     (void)outputs.emplace_back(out);
212   } else if (inputs[0].Rank() == kDefaultImageRank) {
213     (void)outputs.emplace_back(out.AppendDim(inputs[0][kChannelIndexHWC]));
214   } else if (inputs[0].Rank() > kDefaultImageRank) {
215     std::shared_ptr<TensorShape> output_shape_new;
216     RETURN_IF_NOT_OK(ConstructShape(inputs[0], &output_shape_new));
217     (void)outputs.emplace_back(*output_shape_new);
218   }
219   if (!outputs.empty()) {
220     return Status::OK();
221   }
222   RETURN_STATUS_UNEXPECTED("RandomCrop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
223                            std::to_string(inputs[0].Rank()));
224 }
225 }  // namespace dataset
226 }  // namespace mindspore
227