• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/api/python/pybind_conversion.h"
17 
18 namespace mindspore {
19 namespace dataset {
toFloat(const py::handle & handle)20 float toFloat(const py::handle &handle) { return py::reinterpret_borrow<py::float_>(handle); }
21 
toInt(const py::handle & handle)22 int toInt(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
23 
toInt64(const py::handle & handle)24 int64_t toInt64(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
25 
toBool(const py::handle & handle)26 bool toBool(const py::handle &handle) { return py::reinterpret_borrow<py::bool_>(handle); }
27 
toString(const py::handle & handle)28 std::string toString(const py::handle &handle) { return py::reinterpret_borrow<py::str>(handle); }
29 
toStringSet(const py::list list)30 std::set<std::string> toStringSet(const py::list list) {
31   std::set<std::string> set;
32   if (!list.empty()) {
33     for (auto l : list) {
34       if (!l.is_none()) {
35         (void)set.insert(py::str(l));
36       }
37     }
38   }
39   return set;
40 }
41 
toStringMap(const py::dict dict)42 std::map<std::string, int32_t> toStringMap(const py::dict dict) {
43   std::map<std::string, int32_t> map;
44   if (!dict.empty()) {
45     for (auto p : dict) {
46       (void)map.emplace(toString(p.first), toInt(p.second));
47     }
48   }
49   return map;
50 }
51 
toStringFloatMap(const py::dict dict)52 std::map<std::string, float> toStringFloatMap(const py::dict dict) {
53   std::map<std::string, float> map;
54   if (!dict.empty()) {
55     for (auto p : dict) {
56       (void)map.emplace(toString(p.first), toFloat(p.second));
57     }
58   }
59   return map;
60 }
61 
toStringVector(const py::list list)62 std::vector<std::string> toStringVector(const py::list list) {
63   std::vector<std::string> vector;
64   if (!list.empty()) {
65     for (auto l : list) {
66       if (l.is_none()) {
67         vector.emplace_back("");
68       } else {
69         vector.push_back(py::str(l));
70       }
71     }
72   }
73   return vector;
74 }
75 
toIntVector(const py::list input_list)76 std::vector<pid_t> toIntVector(const py::list input_list) {
77   std::vector<pid_t> vector;
78   if (!input_list.empty()) {
79     std::transform(input_list.begin(), input_list.end(), std::back_inserter(vector),
80                    [&](const py::handle &handle) { return static_cast<pid_t>(toInt(handle)); });
81   }
82   return vector;
83 }
84 
toInt64Vector(const py::list input_list)85 std::vector<int64_t> toInt64Vector(const py::list input_list) {
86   std::vector<int64_t> vector;
87   if (!input_list.empty()) {
88     std::transform(input_list.begin(), input_list.end(), std::back_inserter(vector),
89                    [&](const py::handle &handle) { return static_cast<int64_t>(toInt64(handle)); });
90   }
91   return vector;
92 }
93 
toIntMap(const py::dict input_dict)94 std::unordered_map<int32_t, std::vector<pid_t>> toIntMap(const py::dict input_dict) {
95   std::unordered_map<int32_t, std::vector<pid_t>> map;
96   if (!input_dict.empty()) {
97     for (auto p : input_dict) {
98       (void)map.emplace(toInt(p.first), toIntVector(py::reinterpret_borrow<py::list>(p.second)));
99     }
100   }
101   return map;
102 }
103 
toIntPair(const py::tuple tuple)104 std::pair<int64_t, int64_t> toIntPair(const py::tuple tuple) {
105   std::pair<int64_t, int64_t> pair;
106   if (tuple.size() == 2) {
107     pair = std::make_pair(toInt64((tuple)[0]), toInt64((tuple)[1]));
108   }
109   return pair;
110 }
111 
toPairVector(const py::list list)112 std::vector<std::pair<int, int>> toPairVector(const py::list list) {
113   std::vector<std::pair<int, int>> vector;
114   if (list) {
115     for (auto data : list) {
116       auto l = data.cast<py::tuple>();
117       if (l[1].is_none()) {
118         vector.emplace_back(toInt64(l[0]), 0);
119       } else {
120         vector.emplace_back(toInt64(l[0]), toInt64(l[1]));
121       }
122     }
123   }
124   return vector;
125 }
126 
toTensorOperations(py::list operations)127 std::vector<std::shared_ptr<TensorOperation>> toTensorOperations(py::list operations) {
128   std::vector<std::shared_ptr<TensorOperation>> vector;
129   if (!operations.empty()) {
130     for (auto op : operations) {
131       std::shared_ptr<TensorOp> tensor_op;
132       if (py::isinstance<TensorOp>(op)) {
133         tensor_op = op.cast<std::shared_ptr<TensorOp>>();
134         vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
135       } else if (py::isinstance<py::function>(op)) {
136         tensor_op = std::make_shared<PyFuncOp>(op.cast<py::function>());
137         vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
138       } else {
139         if (py::isinstance<TensorOperation>(op)) {
140           vector.push_back(op.cast<std::shared_ptr<TensorOperation>>());
141         } else {
142           THROW_IF_ERROR([]() {
143             RETURN_STATUS_UNEXPECTED(
144               "Error: tensor_op is not recognised (not TensorOp, TensorOperation and not pyfunc).");
145           }());
146         }
147       }
148     }
149   }
150   return vector;
151 }
152 
toTensorOperation(py::handle operation)153 std::shared_ptr<TensorOperation> toTensorOperation(py::handle operation) {
154   std::shared_ptr<TensorOperation> op;
155   std::shared_ptr<TensorOp> tensor_op;
156   if (py::isinstance<TensorOperation>(operation)) {
157     op = operation.cast<std::shared_ptr<TensorOperation>>();
158   } else if (py::isinstance<TensorOp>(operation)) {
159     tensor_op = operation.cast<std::shared_ptr<TensorOp>>();
160     op = std::make_shared<transforms::PreBuiltOperation>(tensor_op);
161   } else {
162     THROW_IF_ERROR(
163       []() { RETURN_STATUS_UNEXPECTED("Error: input operation is not a tensor_op or TensorOperation."); }());
164   }
165   return op;
166 }
167 
toDatasetNode(std::shared_ptr<DatasetNode> self,py::list datasets)168 std::vector<std::shared_ptr<DatasetNode>> toDatasetNode(std::shared_ptr<DatasetNode> self, py::list datasets) {
169   std::vector<std::shared_ptr<DatasetNode>> vector;
170   vector.push_back(self);
171   if (datasets) {
172     for (auto ds : *datasets) {
173       if (py::isinstance<DatasetNode>(ds)) {
174         vector.push_back(ds.cast<std::shared_ptr<DatasetNode>>());
175       } else {
176         THROW_IF_ERROR(
177           []() { RETURN_STATUS_UNEXPECTED("Error: datasets is not recognised (not a DatasetNode instance)."); }());
178       }
179     }
180   }
181   return vector;
182 }
183 
toSamplerObj(const py::handle py_sampler,bool isMindDataset)184 std::shared_ptr<SamplerObj> toSamplerObj(const py::handle py_sampler, bool isMindDataset) {
185   if (py_sampler.is_none()) {
186     return nullptr;
187   }
188   if (py_sampler) {
189     std::shared_ptr<SamplerObj> sampler_obj;
190     if (!isMindDataset) {
191       auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse");
192       sampler_obj = parse().cast<std::shared_ptr<SamplerObj>>();
193     } else {
194       // Mindrecord Sampler
195       std::shared_ptr<mindrecord::ShardOperator> sampler;
196       auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse_for_minddataset");
197       sampler = parse().cast<std::shared_ptr<mindrecord::ShardOperator>>();
198       sampler_obj = std::make_shared<PreBuiltSamplerObj>(std::move(sampler));
199     }
200     return sampler_obj;
201   } else {
202     THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: sampler input is not SamplerRT."); }());
203   }
204   return nullptr;
205 }
206 
207 // Here we take in a python object, that holds a reference to a C++ object
toDatasetCache(std::shared_ptr<CacheClient> cc)208 std::shared_ptr<DatasetCache> toDatasetCache(std::shared_ptr<CacheClient> cc) {
209   if (cc) {
210     std::shared_ptr<DatasetCache> built_cache;
211     built_cache = std::make_shared<PreBuiltDatasetCache>(std::move(cc));
212     return built_cache;
213   } else {
214     // don't need to check here as cache is not enabled.
215     return nullptr;
216   }
217 }
218 
toShuffleMode(const int32_t shuffle)219 ShuffleMode toShuffleMode(const int32_t shuffle) {
220   if (shuffle == 0) {
221     return ShuffleMode::kFalse;
222   }
223   if (shuffle == 1) {
224     return ShuffleMode::kFiles;
225   }
226   if (shuffle == 2) {
227     return ShuffleMode::kGlobal;
228   }
229   return ShuffleMode();
230 }
231 
toCSVBase(py::list csv_bases)232 std::vector<std::shared_ptr<CsvBase>> toCSVBase(py::list csv_bases) {
233   std::vector<std::shared_ptr<CsvBase>> vector;
234   if (csv_bases) {
235     for (auto base : *csv_bases) {
236       if (py::isinstance<py::int_>(base)) {
237         vector.push_back(std::make_shared<CsvRecord<int>>(CsvType::INT, toInt(base)));
238       } else if (py::isinstance<py::float_>(base)) {
239         vector.push_back(std::make_shared<CsvRecord<float>>(CsvType::FLOAT, toFloat(base)));
240       } else if (py::isinstance<py::str>(base)) {
241         vector.push_back(std::make_shared<CsvRecord<std::string>>(CsvType::STRING, toString(base)));
242       } else {
243         THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: each default value must be int, float, or string"); }());
244       }
245     }
246   }
247   return vector;
248 }
249 
ToJson(const py::handle & padded_sample,nlohmann::json * const padded_sample_json,std::map<std::string,std::string> * sample_bytes)250 Status ToJson(const py::handle &padded_sample, nlohmann::json *const padded_sample_json,
251               std::map<std::string, std::string> *sample_bytes) {
252   RETURN_UNEXPECTED_IF_NULL(padded_sample_json);
253   for (const py::handle &key : padded_sample) {
254     if (py::isinstance<py::bytes>(padded_sample[key])) {
255       (*sample_bytes)[py::str(key).cast<std::string>()] = padded_sample[key].cast<std::string>();
256       // py::str(key) enter here will loss its key name, so we create an unuse key for it in json, to pass ValidateParam
257       (*padded_sample_json)[py::str(key).cast<std::string>()] = nlohmann::json::object();
258     } else {
259       nlohmann::json obj_json;
260       if (padded_sample[key].is_none()) {
261         obj_json = nullptr;
262       } else if (py::isinstance<py::int_>(padded_sample[key])) {
263         obj_json = padded_sample[key].cast<int64_t>();
264       } else if (py::isinstance<py::float_>(padded_sample[key])) {
265         obj_json = padded_sample[key].cast<double>();
266       } else if (py::isinstance<py::str>(padded_sample[key])) {
267         obj_json = padded_sample[key].cast<std::string>();  // also catch py::bytes
268       } else {
269         std::string err_msg = "Python object convert to json failed: " + py::cast<std::string>(padded_sample[key]);
270         LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
271       }
272       (*padded_sample_json)[py::str(key).cast<std::string>()] = obj_json;
273     }
274   }
275   return Status::OK();
276 }
277 
toPadInfo(const py::dict & value,std::map<std::string,std::pair<TensorShape,std::shared_ptr<Tensor>>> * pad_info)278 Status toPadInfo(const py::dict &value,
279                  std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> *pad_info) {
280   RETURN_UNEXPECTED_IF_NULL(pad_info);
281   constexpr size_t kExpectedTupleSize = 2;
282   for (auto p : value) {
283     if (!p.second.is_none()) {
284       auto tp = py::reinterpret_borrow<py::tuple>(p.second);
285       CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == kExpectedTupleSize,
286                                    "tuple in pad_info must be (list,int) or (list,float)");
287       TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]);
288       std::shared_ptr<Tensor> pad_val = nullptr;
289       // Do not change the order of py::bytes and py::str. Because py::bytes is also an instance of py::str.
290       if (py::isinstance<py::bytes>(tp[1])) {
291         std::string pad_val_string = tp[1].is_none() ? "" : toString(tp[1]);
292         CHECK_FAIL_RETURN_UNEXPECTED(
293           Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(),
294                                    DataType(DataType::DE_BYTES), &pad_val),
295           "Cannot create pad_value Tensor");
296       } else if (py::isinstance<py::str>(tp[1])) {
297         std::string pad_val_string = tp[1].is_none() ? "" : toString(tp[1]);
298         CHECK_FAIL_RETURN_UNEXPECTED(
299           Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(),
300                                    DataType(DataType::DE_STRING), &pad_val),
301           "Cannot create pad_value Tensor");
302       } else {
303         float pad_val_float = tp[1].is_none() ? 0 : toFloat(tp[1]);
304         CHECK_FAIL_RETURN_UNEXPECTED(
305           Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_val),
306           "Cannot create pad_value Tensor");
307         RETURN_IF_NOT_OK(pad_val->SetItemAt<float>({}, pad_val_float));
308       }
309       (void)pad_info->insert({toString(p.first), {shape, pad_val}});
310     } else {  // tuple is None
311       (void)pad_info->insert({toString(p.first), {TensorShape({}), nullptr}});
312     }
313   }
314   return Status::OK();
315 }
316 
toPyFuncOp(py::object func,DataType::Type data_type)317 std::shared_ptr<TensorOp> toPyFuncOp(py::object func, DataType::Type data_type) {
318   std::shared_ptr<TensorOp> py_func;
319   if (!func.is_none()) {
320     py::function py_function = func.cast<py::function>();
321     py_func = std::make_shared<PyFuncOp>(py_function, data_type);
322   } else {
323     py_func = nullptr;
324   }
325   return py_func;
326 }
327 
shapesToListOfShape(std::vector<TensorShape> shapes)328 py::list shapesToListOfShape(std::vector<TensorShape> shapes) {
329   py::list shape_list;
330   for (const auto &shape : shapes) {
331     py::list per_col_shape;
332     for (auto &elem : shape.AsVector()) {
333       if (elem == -1) {
334         per_col_shape.append(py::none());
335       } else {
336         per_col_shape.append(elem);
337       }
338     }
339     shape_list.append(per_col_shape);
340   }
341   return shape_list;
342 }
343 
typesToListOfType(std::vector<DataType> types)344 py::list typesToListOfType(std::vector<DataType> types) {
345   py::list type_list;
346   for (const auto &type : types) {
347     type_list.append(type.AsNumpyType());
348   }
349   return type_list;
350 }
351 
toIntMapTensor(py::dict value,std::unordered_map<std::int16_t,std::shared_ptr<Tensor>> * feature)352 Status toIntMapTensor(py::dict value, std::unordered_map<std::int16_t, std::shared_ptr<Tensor>> *feature) {
353   RETURN_UNEXPECTED_IF_NULL(feature);
354   for (const auto &p : value) {
355     // do some judge, as whether it is none
356     std::shared_ptr<Tensor> feat_tensor = nullptr;
357     RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(py::reinterpret_borrow<py::array>(p.second), &feat_tensor));
358     (void)feature->insert({toInt(p.first), feat_tensor});
359   }
360   return Status::OK();
361 }
362 }  // namespace dataset
363 }  // namespace mindspore
364