1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/api/python/pybind_conversion.h"
17
18 namespace mindspore {
19 namespace dataset {
toFloat(const py::handle & handle)20 float toFloat(const py::handle &handle) { return py::reinterpret_borrow<py::float_>(handle); }
21
toInt(const py::handle & handle)22 int toInt(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
23
toInt64(const py::handle & handle)24 int64_t toInt64(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
25
toBool(const py::handle & handle)26 bool toBool(const py::handle &handle) { return py::reinterpret_borrow<py::bool_>(handle); }
27
toString(const py::handle & handle)28 std::string toString(const py::handle &handle) { return py::reinterpret_borrow<py::str>(handle); }
29
toStringSet(const py::list list)30 std::set<std::string> toStringSet(const py::list list) {
31 std::set<std::string> set;
32 if (!list.empty()) {
33 for (auto l : list) {
34 if (!l.is_none()) {
35 (void)set.insert(py::str(l));
36 }
37 }
38 }
39 return set;
40 }
41
toStringMap(const py::dict dict)42 std::map<std::string, int32_t> toStringMap(const py::dict dict) {
43 std::map<std::string, int32_t> map;
44 if (!dict.empty()) {
45 for (auto p : dict) {
46 (void)map.emplace(toString(p.first), toInt(p.second));
47 }
48 }
49 return map;
50 }
51
toStringVector(const py::list list)52 std::vector<std::string> toStringVector(const py::list list) {
53 std::vector<std::string> vector;
54 if (!list.empty()) {
55 for (auto l : list) {
56 if (l.is_none())
57 vector.emplace_back("");
58 else
59 vector.push_back(py::str(l));
60 }
61 }
62 return vector;
63 }
64
toIntVector(const py::list input_list)65 std::vector<pid_t> toIntVector(const py::list input_list) {
66 std::vector<pid_t> vector;
67 if (!input_list.empty()) {
68 std::transform(input_list.begin(), input_list.end(), std::back_inserter(vector),
69 [&](const py::handle &handle) { return static_cast<pid_t>(toInt(handle)); });
70 }
71 return vector;
72 }
73
toIntMap(const py::dict input_dict)74 std::unordered_map<int32_t, std::vector<pid_t>> toIntMap(const py::dict input_dict) {
75 std::unordered_map<int32_t, std::vector<pid_t>> map;
76 if (!input_dict.empty()) {
77 for (auto p : input_dict) {
78 (void)map.emplace(toInt(p.first), toIntVector(py::reinterpret_borrow<py::list>(p.second)));
79 }
80 }
81 return map;
82 }
83
toIntPair(const py::tuple tuple)84 std::pair<int64_t, int64_t> toIntPair(const py::tuple tuple) {
85 std::pair<int64_t, int64_t> pair;
86 if (tuple.size() == 2) {
87 pair = std::make_pair(toInt64((tuple)[0]), toInt64((tuple)[1]));
88 }
89 return pair;
90 }
91
toPairVector(const py::list list)92 std::vector<std::pair<int, int>> toPairVector(const py::list list) {
93 std::vector<std::pair<int, int>> vector;
94 if (list) {
95 for (auto data : list) {
96 auto l = data.cast<py::tuple>();
97 if (l[1].is_none())
98 vector.emplace_back(toInt64(l[0]), 0);
99 else
100 vector.emplace_back(toInt64(l[0]), toInt64(l[1]));
101 }
102 }
103 return vector;
104 }
105
toTensorOperations(py::list operations)106 std::vector<std::shared_ptr<TensorOperation>> toTensorOperations(py::list operations) {
107 std::vector<std::shared_ptr<TensorOperation>> vector;
108 if (!operations.empty()) {
109 for (auto op : operations) {
110 std::shared_ptr<TensorOp> tensor_op;
111 if (py::isinstance<TensorOp>(op)) {
112 tensor_op = op.cast<std::shared_ptr<TensorOp>>();
113 vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
114 } else if (py::isinstance<py::function>(op)) {
115 tensor_op = std::make_shared<PyFuncOp>(op.cast<py::function>());
116 vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
117 } else {
118 if (py::isinstance<TensorOperation>(op)) {
119 vector.push_back(op.cast<std::shared_ptr<TensorOperation>>());
120 } else {
121 THROW_IF_ERROR([]() {
122 RETURN_STATUS_UNEXPECTED(
123 "Error: tensor_op is not recognised (not TensorOp, TensorOperation and not pyfunc).");
124 }());
125 }
126 }
127 }
128 }
129 return vector;
130 }
131
toTensorOperation(py::handle operation)132 std::shared_ptr<TensorOperation> toTensorOperation(py::handle operation) {
133 std::shared_ptr<TensorOperation> op;
134 std::shared_ptr<TensorOp> tensor_op;
135 if (py::isinstance<TensorOperation>(operation)) {
136 op = operation.cast<std::shared_ptr<TensorOperation>>();
137 } else if (py::isinstance<TensorOp>(operation)) {
138 tensor_op = operation.cast<std::shared_ptr<TensorOp>>();
139 op = std::make_shared<transforms::PreBuiltOperation>(tensor_op);
140 } else {
141 THROW_IF_ERROR(
142 []() { RETURN_STATUS_UNEXPECTED("Error: input operation is not a tensor_op or TensorOperation."); }());
143 }
144 return op;
145 }
146
toDatasetNode(std::shared_ptr<DatasetNode> self,py::list datasets)147 std::vector<std::shared_ptr<DatasetNode>> toDatasetNode(std::shared_ptr<DatasetNode> self, py::list datasets) {
148 std::vector<std::shared_ptr<DatasetNode>> vector;
149 vector.push_back(self);
150 if (datasets) {
151 for (auto ds : *datasets) {
152 if (py::isinstance<DatasetNode>(ds)) {
153 vector.push_back(ds.cast<std::shared_ptr<DatasetNode>>());
154 } else {
155 THROW_IF_ERROR(
156 []() { RETURN_STATUS_UNEXPECTED("Error: datasets is not recognised (not a DatasetNode instance)."); }());
157 }
158 }
159 }
160 return vector;
161 }
162
toSamplerObj(const py::handle py_sampler,bool isMindDataset)163 std::shared_ptr<SamplerObj> toSamplerObj(const py::handle py_sampler, bool isMindDataset) {
164 if (py_sampler.is_none()) {
165 return nullptr;
166 }
167 if (py_sampler) {
168 std::shared_ptr<SamplerObj> sampler_obj;
169 if (!isMindDataset) {
170 auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse");
171 sampler_obj = parse().cast<std::shared_ptr<SamplerObj>>();
172 } else {
173 // Mindrecord Sampler
174 std::shared_ptr<mindrecord::ShardOperator> sampler;
175 auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse_for_minddataset");
176 sampler = parse().cast<std::shared_ptr<mindrecord::ShardOperator>>();
177 sampler_obj = std::make_shared<PreBuiltSamplerObj>(std::move(sampler));
178 }
179 return sampler_obj;
180 } else {
181 THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: sampler input is not SamplerRT."); }());
182 }
183 return nullptr;
184 }
185
186 // Here we take in a python object, that holds a reference to a C++ object
toDatasetCache(std::shared_ptr<CacheClient> cc)187 std::shared_ptr<DatasetCache> toDatasetCache(std::shared_ptr<CacheClient> cc) {
188 if (cc) {
189 std::shared_ptr<DatasetCache> built_cache;
190 built_cache = std::make_shared<PreBuiltDatasetCache>(std::move(cc));
191 return built_cache;
192 } else {
193 // don't need to check here as cache is not enabled.
194 return nullptr;
195 }
196 }
197
toShuffleMode(const int32_t shuffle)198 ShuffleMode toShuffleMode(const int32_t shuffle) {
199 if (shuffle == 0) {
200 return ShuffleMode::kFalse;
201 }
202 if (shuffle == 1) {
203 return ShuffleMode::kFiles;
204 }
205 if (shuffle == 2) {
206 return ShuffleMode::kGlobal;
207 }
208 return ShuffleMode();
209 }
210
toCSVBase(py::list csv_bases)211 std::vector<std::shared_ptr<CsvBase>> toCSVBase(py::list csv_bases) {
212 std::vector<std::shared_ptr<CsvBase>> vector;
213 if (csv_bases) {
214 for (auto base : *csv_bases) {
215 if (py::isinstance<py::int_>(base)) {
216 vector.push_back(std::make_shared<CsvRecord<int>>(CsvType::INT, toInt(base)));
217 } else if (py::isinstance<py::float_>(base)) {
218 vector.push_back(std::make_shared<CsvRecord<float>>(CsvType::FLOAT, toFloat(base)));
219 } else if (py::isinstance<py::str>(base)) {
220 vector.push_back(std::make_shared<CsvRecord<std::string>>(CsvType::STRING, toString(base)));
221 } else {
222 THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: each default value must be int, float, or string"); }());
223 }
224 }
225 }
226 return vector;
227 }
228
ToJson(const py::handle & padded_sample,nlohmann::json * const padded_sample_json,std::map<std::string,std::string> * sample_bytes)229 Status ToJson(const py::handle &padded_sample, nlohmann::json *const padded_sample_json,
230 std::map<std::string, std::string> *sample_bytes) {
231 for (const py::handle &key : padded_sample) {
232 if (py::isinstance<py::bytes>(padded_sample[key])) {
233 (*sample_bytes)[py::str(key).cast<std::string>()] = padded_sample[key].cast<std::string>();
234 // py::str(key) enter here will loss its key name, so we create an unuse key for it in json, to pass ValidateParam
235 (*padded_sample_json)[py::str(key).cast<std::string>()] = nlohmann::json::object();
236 } else {
237 nlohmann::json obj_json;
238 if (padded_sample[key].is_none()) {
239 obj_json = nullptr;
240 } else if (py::isinstance<py::int_>(padded_sample[key])) {
241 obj_json = padded_sample[key].cast<int64_t>();
242 } else if (py::isinstance<py::float_>(padded_sample[key])) {
243 obj_json = padded_sample[key].cast<double>();
244 } else if (py::isinstance<py::str>(padded_sample[key])) {
245 obj_json = padded_sample[key].cast<std::string>(); // also catch py::bytes
246 } else {
247 MS_LOG(ERROR) << "Python object convert to json failed: " << py::cast<std::string>(padded_sample[key]);
248 RETURN_STATUS_SYNTAX_ERROR("Python object convert to json failed");
249 }
250 (*padded_sample_json)[py::str(key).cast<std::string>()] = obj_json;
251 }
252 }
253 return Status::OK();
254 }
255
toPadInfo(py::dict value,std::map<std::string,std::pair<TensorShape,std::shared_ptr<Tensor>>> * pad_info)256 Status toPadInfo(py::dict value, std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> *pad_info) {
257 constexpr size_t kExpectedTupleSize = 2;
258 for (auto p : value) {
259 if (!p.second.is_none()) {
260 auto tp = py::reinterpret_borrow<py::tuple>(p.second);
261 CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == kExpectedTupleSize,
262 "tuple in pad_info must be (list,int) or (list,float)");
263 TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]);
264 std::shared_ptr<Tensor> pad_val = nullptr;
265 if (py::isinstance<py::str>(tp[1])) {
266 std::string pad_val_string = tp[1].is_none() ? "" : toString(tp[1]);
267 CHECK_FAIL_RETURN_UNEXPECTED(
268 Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(), &pad_val),
269 "Cannot create pad_value Tensor");
270 } else {
271 float pad_val_float = tp[1].is_none() ? 0 : toFloat(tp[1]);
272 CHECK_FAIL_RETURN_UNEXPECTED(
273 Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_val),
274 "Cannot create pad_value Tensor");
275 RETURN_IF_NOT_OK(pad_val->SetItemAt<float>({}, pad_val_float));
276 }
277 (void)pad_info->insert({toString(p.first), {shape, pad_val}});
278 } else { // tuple is None
279 (void)pad_info->insert({toString(p.first), {TensorShape({}), nullptr}});
280 }
281 }
282 return Status::OK();
283 }
284
toPyFuncOp(py::object func,DataType::Type data_type)285 std::shared_ptr<TensorOp> toPyFuncOp(py::object func, DataType::Type data_type) {
286 std::shared_ptr<TensorOp> py_func;
287 if (!func.is_none()) {
288 py::function py_function = func.cast<py::function>();
289 py_func = std::make_shared<PyFuncOp>(py_function, data_type);
290 } else {
291 py_func = nullptr;
292 }
293 return py_func;
294 }
295
shapesToListOfShape(std::vector<TensorShape> shapes)296 py::list shapesToListOfShape(std::vector<TensorShape> shapes) {
297 py::list shape_list;
298 for (const auto &shape : shapes) {
299 shape_list.append(shape.AsVector());
300 }
301 return shape_list;
302 }
303
typesToListOfType(std::vector<DataType> types)304 py::list typesToListOfType(std::vector<DataType> types) {
305 py::list type_list;
306 for (const auto &type : types) {
307 type_list.append(type.AsNumpyType());
308 }
309 return type_list;
310 }
311 } // namespace dataset
312 } // namespace mindspore
313