• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "album_op_android.h"  // NOLINT
18 #include <fstream>
19 #include <iomanip>
20 #include "minddata/dataset/core/tensor_shape.h"
21 #include "minddata/dataset/kernels/image/lite_image_utils.h"
22 #include "minddata/dataset/kernels/image/exif_utils.h"
23 
24 namespace mindspore {
25 namespace dataset {
26 
AlbumOp(const std::string & file_dir,bool do_decode,const std::string & schema_file,const std::vector<std::string> & column_names,const std::set<std::string> & exts)27 AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
28                  const std::vector<std::string> &column_names, const std::set<std::string> &exts)
29     : folder_path_(file_dir),
30       decode_(do_decode),
31       extensions_(exts),
32       schema_file_(schema_file),
33       row_cnt_(0),
34       buf_cnt_(0),
35       current_cnt_(0),
36       dirname_offset_(0),
37       sampler_(false),
38       sampler_index_(0),
39       rotate_(true),
40       column_names_(column_names) {
41   PrescanEntry();
42 }
43 
AlbumOp(const std::string & file_dir,bool do_decode,const std::string & schema_file,const std::vector<std::string> & column_names,const std::set<std::string> & exts,uint32_t index)44 AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
45                  const std::vector<std::string> &column_names, const std::set<std::string> &exts, uint32_t index)
46     : folder_path_(file_dir),
47       decode_(do_decode),
48       extensions_(exts),
49       schema_file_(schema_file),
50       row_cnt_(0),
51       buf_cnt_(0),
52       current_cnt_(0),
53       dirname_offset_(0),
54       sampler_(true),
55       sampler_index_(index),
56       rotate_(true),
57       column_names_(column_names) {
58   PrescanEntry();
59 }
60 
61 // Helper function for string comparison
62 // album sorts the files via numerical values, so this is not a simple string comparison
StrComp(const std::string & a,const std::string & b)63 bool StrComp(const std::string &a, const std::string &b) {
64   // returns 1 if string "a" represent a numeric value less than string "b"
65   // the following will always return name, provided there is only one "." character in name
66   // "." character is guaranteed to exist since the extension is checked before this function call.
67   int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str());
68   int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str());
69   return value_a < value_b;
70 }
71 
72 // Single thread to go through the folder directory and gets all file names
73 // calculate numRows then return
PrescanEntry()74 Status AlbumOp::PrescanEntry() {
75   data_schema_ = std::make_unique<DataSchema>();
76   Path schema_file(schema_file_);
77   if (schema_file_ == "" || !schema_file.Exists()) {
78     RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_);
79   } else {
80     MS_LOG(INFO) << "Schema file provided: " << schema_file_ << ".";
81     data_schema_->LoadSchemaFile(schema_file_, columns_to_load_);
82   }
83 
84   Path folder(folder_path_);
85   dirname_offset_ = folder_path_.length();
86   std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
87   if (folder.Exists() == false || dirItr == nullptr) {
88     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_);
89   }
90   MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
91 
92   while (dirItr->hasNext()) {
93     Path file = dirItr->next();
94     if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
95       (void)image_rows_.push_back(file.toString().substr(dirname_offset_));
96     } else {
97       MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString()
98                       << ", extension: " << file.Extension() << ".";
99     }
100   }
101 
102   std::sort(image_rows_.begin(), image_rows_.end(), StrComp);
103 
104   if (image_rows_.size() == 0) {
105     RETURN_STATUS_UNEXPECTED(
106       "Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API.");
107   }
108 
109   if (sampler_) {
110     if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) {
111       RETURN_STATUS_UNEXPECTED("the sampler index was out of range");
112     }
113     std::vector<std::string> tmp;
114     tmp.emplace_back(image_rows_[sampler_index_]);
115     image_rows_.clear();
116     image_rows_ = tmp;
117   }
118 
119   return Status::OK();
120 }
121 
122 // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_
123 // IMPORTANT: 1 IOBlock produces 1 DataBuffer
GetNextRow(std::unordered_map<std::string,std::shared_ptr<Tensor>> * map_row)124 bool AlbumOp::GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
125   if (map_row == nullptr) {
126     MS_LOG(ERROR) << "GetNextRow in AlbumOp: the point of map_row is nullptr";
127     return false;
128   }
129 
130   if (current_cnt_ == image_rows_.size()) {
131     return false;
132   }
133 
134   Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row);
135   if (ret.IsError()) {
136     MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n";
137     return false;
138   }
139   current_cnt_++;
140   return true;
141 }
142 
143 // Only support JPEG/PNG/GIF/BMP
144 // Optimization: Could take in a tensor
145 // This function does not return status because we want to just skip bad input, not crash
CheckImageType(const std::string & file_name,bool * valid)146 bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
147   std::ifstream file_handle;
148   constexpr int read_num = 3;
149   *valid = false;
150   file_handle.open(file_name, std::ios::binary | std::ios::in);
151   if (!file_handle.is_open()) {
152     return false;
153   }
154   unsigned char file_type[read_num];
155   (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
156 
157   if (file_handle.fail()) {
158     file_handle.close();
159     return false;
160   }
161   file_handle.close();
162   if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
163     // Normal JPEGs start with \xff\xd8\xff\xe0
164     // JPEG with EXIF stats with \xff\xd8\xff\xe1
165     // Use \xff\xd8\xff to cover both.
166     *valid = true;
167   }
168   return true;
169 }
170 
IsReadColumn(const std::string & column_name)171 bool AlbumOp::IsReadColumn(const std::string &column_name) {
172   for (uint32_t i = 0; i < this->column_names_.size(); i++) {
173     if (this->column_names_[i] == column_name) {
174       return true;
175     }
176   }
177   return false;
178 }
179 
LoadImageTensor(const std::string & image_file_path,int32_t col_num,TensorPtr * tensor)180 Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorPtr *tensor) {
181   TensorPtr image;
182   TensorPtr rotate_tensor;
183   std::ifstream fs;
184   fs.open(image_file_path, std::ios::binary | std::ios::in);
185   if (fs.fail()) {
186     MS_LOG(WARNING) << "File not found:" << image_file_path << ".";
187     // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
188     RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
189     return Status::OK();
190   }
191   // Hack logic to replace png images with empty tensor
192   Path file(image_file_path);
193   std::set<std::string> png_ext = {".png", ".PNG"};
194   if (png_ext.find(file.Extension()) != png_ext.end()) {
195     // load empty tensor since image is not jpg
196     MS_LOG(INFO) << "load empty tensor since image is PNG" << image_file_path << ".";
197     RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
198     return Status::OK();
199   }
200   // treat bin files separately
201   std::set<std::string> bin_ext = {".bin", ".BIN"};
202   if (bin_ext.find(file.Extension()) != bin_ext.end()) {
203     // load empty tensor since image is not jpg
204     MS_LOG(INFO) << "Bin file found" << image_file_path << ".";
205     RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor));
206     return Status::OK();
207   }
208 
209   // check that the file is an image before decoding
210   bool valid = false;
211   bool check_success = CheckImageType(image_file_path, &valid);
212   if (!check_success || !valid) {
213     RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
214     return Status::OK();
215   }
216   // if it is a jpeg image, load and try to decode
217   RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
218   Status rc;
219   if (decode_ && valid) {
220     rc = Decode(image, tensor);
221     if (rc.IsError()) {
222       RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
223       return Status::OK();
224     }
225   }
226   return Status::OK();
227 }
228 
229 // get orientation from EXIF file
GetOrientation(const std::string & folder_path)230 int AlbumOp::GetOrientation(const std::string &folder_path) {
231   FILE *fp = fopen(folder_path.c_str(), "rb");
232   if (fp == nullptr) {
233     MS_LOG(ERROR) << "Can't read file for EXIF:  file = " << folder_path;
234     return 0;
235   }
236   fseek(fp, 0, SEEK_END);
237   int64_t fsize = ftell(fp);
238   rewind(fp);
239   if (fsize > INT_MAX) {
240     fclose(fp);
241     return 0;
242   }
243   unsigned char *buf = new unsigned char[fsize];
244   if (fread(buf, 1, fsize, fp) != fsize) {
245     MS_LOG(ERROR) << "read file size error for EXIF:  file = " << folder_path;
246     delete[] buf;
247     fclose(fp);
248     return 0;
249   }
250   fclose(fp);
251 
252   // Parse EXIF
253   mindspore::dataset::ExifInfo result;
254   int code = result.parseOrientation(buf, fsize);
255   delete[] buf;
256   MS_LOG(INFO) << "AlbumOp::GetOrientation:  orientation= " << code << ".";
257   return code;
258 }
259 
LoadStringArrayTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)260 Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
261   std::vector<std::string> data = json_obj.get<std::vector<std::string>>();
262 
263   MS_LOG(INFO) << "String array label found: " << data << ".";
264   RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
265   return Status::OK();
266 }
267 
LoadStringTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)268 Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
269   std::string data = json_obj;
270   // now we iterate over the elements in json
271 
272   MS_LOG(INFO) << "String label found: " << data << ".";
273   TensorPtr label;
274   RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, tensor));
275   return Status::OK();
276 }
277 
LoadIntArrayTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)278 Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
279   // consider templating this function to handle all ints
280   if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
281     std::vector<int64_t> data;
282 
283     // Iterate over the integer list and add those values to the output shape tensor
284     auto items = json_obj.items();
285     using it_type = decltype(items.begin());
286     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
287 
288     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
289   } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
290     std::vector<int32_t> data;
291 
292     // Iterate over the integer list and add those values to the output shape tensor
293     auto items = json_obj.items();
294     using it_type = decltype(items.begin());
295     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
296 
297     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
298   } else {
299     RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " +
300                              data_schema_->Column(col_num).Type().ToString());
301   }
302   return Status::OK();
303 }
304 
LoadFloatArrayTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)305 Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
306   // consider templating this function to handle all ints
307   if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
308     std::vector<double> data;
309 
310     // Iterate over the integer list and add those values to the output shape tensor
311     auto items = json_obj.items();
312     using it_type = decltype(items.begin());
313     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
314 
315     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
316   } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
317     std::vector<float> data;
318 
319     // Iterate over the integer list and add those values to the output shape tensor
320     auto items = json_obj.items();
321     using it_type = decltype(items.begin());
322     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
323 
324     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
325   } else {
326     RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " +
327                              data_schema_->Column(col_num).Type().ToString());
328   }
329   return Status::OK();
330 }
331 
LoadIDTensor(const std::string & file,int32_t col_num,TensorPtr * tensor)332 Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor) {
333   if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) {
334     RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor));
335     return Status::OK();
336   }
337   // hack to get the file name without extension, the 1 is to get rid of the backslash character
338   int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str());
339   RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, tensor));
340   MS_LOG(INFO) << "File ID " << image_id << ".";
341   return Status::OK();
342 }
343 
LoadEmptyTensor(int32_t col_num,TensorPtr * tensor)344 Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) {
345   // hack to get the file name without extension, the 1 is to get rid of the backslash character
346   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), tensor));
347   return Status::OK();
348 }
349 
350 // Loads a tensor with float value, issue with float64, we don't have reverse look up to the type
351 // So we actually have to check what type we want to fill the tensor with.
352 // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
353 // only be float32, seems like a weird limitation to impose
LoadFloatTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)354 Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
355   if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
356     double data = json_obj;
357     MS_LOG(INFO) << "double found: " << json_obj << ".";
358     RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor));
359   } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
360     float data = json_obj;
361     RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor));
362     MS_LOG(INFO) << "float found: " << json_obj << ".";
363   }
364   return Status::OK();
365 }
366 
367 // Loads a tensor with int value, we have to cast the value to type specified in the schema.
LoadIntTensor(const nlohmann::json & json_obj,int32_t col_num,TensorPtr * tensor)368 Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
369   if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
370     int64_t data = json_obj;
371     MS_LOG(INFO) << "int64 found: " << json_obj << ".";
372     RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor));
373   } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
374     int32_t data = json_obj;
375     RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor));
376     MS_LOG(INFO) << "int32 found: " << json_obj << ".";
377   }
378   return Status::OK();
379 }
380 
LoadIntTensorRowByIndex(int index,bool is_array,const nlohmann::json & column_value,std::unordered_map<std::string,std::shared_ptr<Tensor>> * map_row)381 Status AlbumOp::LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann::json &column_value,
382                                         std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
383   int i = index;
384   // int value
385   if (!is_array &&
386       (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
387     TensorPtr tensor;
388     RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
389     (*map_row)[data_schema_->Column(i).Name()] = tensor;
390   }
391   // int array
392   if (is_array &&
393       (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
394     TensorPtr tensor;
395     RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
396     (*map_row)[data_schema_->Column(i).Name()] = tensor;
397   }
398   return Status::OK();
399 }
400 
LoadTensorRowByIndex(int index,const std::string & file,const nlohmann::json & js,std::unordered_map<std::string,std::shared_ptr<Tensor>> * map_row)401 Status AlbumOp::LoadTensorRowByIndex(int index, const std::string &file, const nlohmann::json &js,
402                                      std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
403   int i = index;
404   // special case to handle
405   if (data_schema_->Column(i).name() == "id") {
406     // id is internal, special case to load from file
407     TensorPtr tensor;
408     RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
409     (*map_row)[data_schema_->Column(i).Name()] = tensor;
410   }
411   // find if key does not exist, insert placeholder nullptr if not found
412   if (js.find(data_schema_->Column(i).Name()) == js.end()) {
413     // iterator not found, push nullptr as placeholder
414     MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << ".";
415     TensorPtr tensor;
416     RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
417     (*map_row)[data_schema_->Column(i).Name()] = tensor;
418   }
419   nlohmann::json column_value = js.at(data_schema_->Column(i).Name());
420   MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << ".";
421   bool is_array = column_value.is_array();
422   // load single string
423   if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) {
424     TensorPtr tensor;
425     RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
426     (*map_row)[data_schema_->Column(i).Name()] = tensor;
427   }
428   // load string array
429   if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) {
430     TensorPtr tensor;
431     RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
432     (*map_row)[data_schema_->Column(i).Name()] = tensor;
433   }
434   // load image file
435   if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) {
436     std::string image_file_path = column_value;
437     TensorPtr tensor;
438     RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
439     (*map_row)[data_schema_->Column(i).Name()] = tensor;
440     uint32_t orientation = GetOrientation(image_file_path);
441     TensorPtr scalar_tensor;
442     RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
443     (*map_row)["orientation"] = scalar_tensor;
444   }
445   // load float value
446   if (!is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 ||
447                     data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) {
448     TensorPtr tensor;
449     RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
450     (*map_row)[data_schema_->Column(i).Name()] = tensor;
451   }
452   // load float array
453   if (is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 ||
454                    data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) {
455     TensorPtr tensor;
456     RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
457     (*map_row)[data_schema_->Column(i).Name()] = tensor;
458   }
459 
460   RETURN_IF_NOT_OK(LoadIntTensorRowByIndex(i, is_array, column_value, map_row));
461   return Status::OK();
462 }
463 
464 // Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
465 // possible optimization: the helper functions of LoadTensorRow should be optimized
466 // to take a reference to a column descriptor?
467 // the design of this class is to make the code more readable, forgoing minor performance gain like
468 // getting rid of duplicated checks
LoadTensorRow(row_id_type row_id,const std::string & file,std::unordered_map<std::string,std::shared_ptr<Tensor>> * map_row)469 Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
470                               std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
471   // testing here is to just print out file path
472   MS_LOG(INFO) << "Image row file: " << file << ".";
473 
474   std::ifstream file_handle(folder_path_ + file);
475   if (!file_handle.is_open()) {
476     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
477   }
478   std::string line;
479   while (getline(file_handle, line)) {
480     try {
481       nlohmann::json js = nlohmann::json::parse(line);
482       MS_LOG(INFO) << "This Line: " << line << ".";
483 
484       // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key
485       // get columns in schema:
486       int32_t columns = data_schema_->NumColumns();
487 
488       // loop over each column descriptor, this can optimized by switch cases
489       for (int32_t i = 0; i < columns; i++) {
490         if (!IsReadColumn(data_schema_->Column(i).Name())) {
491           continue;
492         }
493         RETURN_IF_NOT_OK(LoadTensorRowByIndex(i, file, js, map_row));
494       }
495     } catch (const std::exception &err) {
496       file_handle.close();
497       RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
498     }
499   }
500   file_handle.close();
501   return Status::OK();
502 }
503 }  // namespace dataset
504 }  // namespace mindspore
505