1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "common.h" 17 #include <algorithm> 18 #include <fstream> 19 #include <string> 20 #include <vector> 21 #include "minddata/dataset/core/client.h" 22 #include "minddata/dataset/core/config_manager.h" 23 #include "minddata/dataset/core/pybind_support.h" 24 #include "minddata/dataset/core/tensor.h" 25 #include "minddata/dataset/core/tensor_shape.h" 26 #include "minddata/dataset/engine/datasetops/batch_op.h" 27 #include "minddata/dataset/engine/datasetops/repeat_op.h" 28 #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" 29 30 namespace UT { 31 #ifdef __cplusplus 32 #if __cplusplus 33 extern "C" { 34 #endif 35 #endif 36 37 void DatasetOpTesting::SetUp() { 38 std::string install_home = "data/dataset"; 39 datasets_root_path_ = install_home; 40 mindrecord_root_path_ = "data/mindrecord"; 41 } 42 43 std::vector<mindspore::dataset::TensorShape> DatasetOpTesting::ToTensorShapeVec( 44 const std::vector<std::vector<int64_t>> &v) { 45 std::vector<mindspore::dataset::TensorShape> ret_v; 46 std::transform(v.begin(), v.end(), std::back_inserter(ret_v), 47 [](const auto &s) { return mindspore::dataset::TensorShape(s); }); 48 return ret_v; 49 } 50 51 std::vector<mindspore::dataset::DataType> DatasetOpTesting::ToDETypes(const std::vector<mindspore::DataType> &t) { 52 std::vector<mindspore::dataset::DataType> ret_t; 53 std::transform(t.begin(), t.end(), std::back_inserter(ret_t), [](const mindspore::DataType &t) { 54 return mindspore::dataset::MSTypeToDEType(static_cast<mindspore::TypeId>(t)); 55 }); 56 return ret_t; 57 } 58 59 // Function to read a file into an MSTensor 60 // Note: This provides the analogous support for DETensor's CreateFromFile. 61 mindspore::MSTensor DatasetOpTesting::ReadFileToTensor(const std::string &file) { 62 if (file.empty()) { 63 MS_LOG(ERROR) << "Pointer file is nullptr; return an empty Tensor."; 64 return mindspore::MSTensor(); 65 } 66 std::ifstream ifs(file); 67 if (!ifs.good()) { 68 MS_LOG(ERROR) << "File: " << file << " does not exist; return an empty Tensor."; 69 return mindspore::MSTensor(); 70 } 71 if (!ifs.is_open()) { 72 MS_LOG(ERROR) << "File: " << file << " open failed; return an empty Tensor."; 73 return mindspore::MSTensor(); 74 } 75 76 ifs.seekg(0, std::ios::end); 77 size_t size = ifs.tellg(); 78 mindspore::MSTensor buf("file", mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size); 79 80 ifs.seekg(0, std::ios::beg); 81 ifs.read(reinterpret_cast<char *>(buf.MutableData()), size); 82 ifs.close(); 83 84 return buf; 85 } 86 87 // Helper function to create a batch op 88 std::shared_ptr<mindspore::dataset::BatchOp> DatasetOpTesting::Batch(int32_t batch_size, bool drop, 89 mindspore::dataset::PadInfo pad_map) { 90 /* 91 std::shared_ptr<mindspore::dataset::ConfigManager> cfg = mindspore::dataset::GlobalContext::config_manager(); 92 int32_t num_workers = cfg->num_parallel_workers(); 93 int32_t op_connector_size = cfg->op_connector_size(); 94 std::vector<std::string> output_columns = {}; 95 std::vector<std::string> input_columns = {}; 96 mindspore::dataset::py::function batch_size_func; 97 mindspore::dataset::py::function batch_map_func; 98 bool pad = false; 99 if (!pad_map.empty()) { 100 pad = true; 101 } 102 std::shared_ptr<mindspore::dataset::BatchOp> op = 103 std::make_shared<mindspore::dataset::BatchOp>(batch_size, drop, pad, op_connector_size, num_workers, input_columns, 104 output_columns, batch_size_func, batch_map_func, pad_map); return op; 105 */ 106 Status rc; 107 std::shared_ptr<mindspore::dataset::BatchOp> op; 108 rc = mindspore::dataset::BatchOp::Builder(batch_size).SetDrop(drop).SetPaddingMap(pad_map).Build(&op); 109 EXPECT_TRUE(rc.IsOk()); 110 return std::move(op); 111 } 112 113 std::shared_ptr<mindspore::dataset::RepeatOp> DatasetOpTesting::Repeat(int repeat_cnt) { 114 std::shared_ptr<mindspore::dataset::RepeatOp> op = std::make_shared<mindspore::dataset::RepeatOp>(repeat_cnt); 115 return std::move(op); 116 } 117 118 std::shared_ptr<mindspore::dataset::TFReaderOp> DatasetOpTesting::TFReader(std::string file, int num_works) { 119 std::shared_ptr<mindspore::dataset::ConfigManager> config_manager = 120 mindspore::dataset::GlobalContext::config_manager(); 121 auto op_connector_size = config_manager->op_connector_size(); 122 auto worker_connector_size = config_manager->worker_connector_size(); 123 std::vector<std::string> columns_to_load = {}; 124 std::vector<std::string> files = {file}; 125 std::shared_ptr<mindspore::dataset::TFReaderOp> so = std::make_shared<mindspore::dataset::TFReaderOp>( 126 num_works, worker_connector_size, 0, files, std::make_unique<mindspore::dataset::DataSchema>(), op_connector_size, 127 columns_to_load, false, 1, 0, false); 128 (void)so->Init(); 129 return std::move(so); 130 } 131 132 std::shared_ptr<mindspore::dataset::ExecutionTree> DatasetOpTesting::Build( 133 std::vector<std::shared_ptr<mindspore::dataset::DatasetOp>> ops) { 134 std::shared_ptr<mindspore::dataset::ExecutionTree> tree = std::make_shared<mindspore::dataset::ExecutionTree>(); 135 for (int i = 0; i < ops.size(); i++) { 136 tree->AssociateNode(ops[i]); 137 if (i > 0) { 138 ops[i]->AddChild(std::move(ops[i - 1])); 139 } 140 if (i == ops.size() - 1) { 141 tree->AssignRoot(ops[i]); 142 } 143 } 144 return std::move(tree); 145 } 146 147 #ifdef __cplusplus 148 #if __cplusplus 149 } 150 #endif 151 #endif 152 } // namespace UT 153