• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/api/python/pybind_conversion.h"
17 
18 namespace mindspore {
19 namespace dataset {
toFloat(const py::handle & handle)20 float toFloat(const py::handle &handle) { return py::reinterpret_borrow<py::float_>(handle); }
21 
toInt(const py::handle & handle)22 int toInt(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
23 
toInt64(const py::handle & handle)24 int64_t toInt64(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
25 
toBool(const py::handle & handle)26 bool toBool(const py::handle &handle) { return py::reinterpret_borrow<py::bool_>(handle); }
27 
toString(const py::handle & handle)28 std::string toString(const py::handle &handle) { return py::reinterpret_borrow<py::str>(handle); }
29 
toStringSet(const py::list list)30 std::set<std::string> toStringSet(const py::list list) {
31   std::set<std::string> set;
32   if (!list.empty()) {
33     for (auto l : list) {
34       if (!l.is_none()) {
35         (void)set.insert(py::str(l));
36       }
37     }
38   }
39   return set;
40 }
41 
toStringMap(const py::dict dict)42 std::map<std::string, int32_t> toStringMap(const py::dict dict) {
43   std::map<std::string, int32_t> map;
44   if (!dict.empty()) {
45     for (auto p : dict) {
46       (void)map.emplace(toString(p.first), toInt(p.second));
47     }
48   }
49   return map;
50 }
51 
toStringVector(const py::list list)52 std::vector<std::string> toStringVector(const py::list list) {
53   std::vector<std::string> vector;
54   if (!list.empty()) {
55     for (auto l : list) {
56       if (l.is_none())
57         vector.emplace_back("");
58       else
59         vector.push_back(py::str(l));
60     }
61   }
62   return vector;
63 }
64 
toIntVector(const py::list input_list)65 std::vector<pid_t> toIntVector(const py::list input_list) {
66   std::vector<pid_t> vector;
67   if (!input_list.empty()) {
68     std::transform(input_list.begin(), input_list.end(), std::back_inserter(vector),
69                    [&](const py::handle &handle) { return static_cast<pid_t>(toInt(handle)); });
70   }
71   return vector;
72 }
73 
toIntMap(const py::dict input_dict)74 std::unordered_map<int32_t, std::vector<pid_t>> toIntMap(const py::dict input_dict) {
75   std::unordered_map<int32_t, std::vector<pid_t>> map;
76   if (!input_dict.empty()) {
77     for (auto p : input_dict) {
78       (void)map.emplace(toInt(p.first), toIntVector(py::reinterpret_borrow<py::list>(p.second)));
79     }
80   }
81   return map;
82 }
83 
toIntPair(const py::tuple tuple)84 std::pair<int64_t, int64_t> toIntPair(const py::tuple tuple) {
85   std::pair<int64_t, int64_t> pair;
86   if (tuple.size() == 2) {
87     pair = std::make_pair(toInt64((tuple)[0]), toInt64((tuple)[1]));
88   }
89   return pair;
90 }
91 
toPairVector(const py::list list)92 std::vector<std::pair<int, int>> toPairVector(const py::list list) {
93   std::vector<std::pair<int, int>> vector;
94   if (list) {
95     for (auto data : list) {
96       auto l = data.cast<py::tuple>();
97       if (l[1].is_none())
98         vector.emplace_back(toInt64(l[0]), 0);
99       else
100         vector.emplace_back(toInt64(l[0]), toInt64(l[1]));
101     }
102   }
103   return vector;
104 }
105 
toTensorOperations(py::list operations)106 std::vector<std::shared_ptr<TensorOperation>> toTensorOperations(py::list operations) {
107   std::vector<std::shared_ptr<TensorOperation>> vector;
108   if (!operations.empty()) {
109     for (auto op : operations) {
110       std::shared_ptr<TensorOp> tensor_op;
111       if (py::isinstance<TensorOp>(op)) {
112         tensor_op = op.cast<std::shared_ptr<TensorOp>>();
113         vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
114       } else if (py::isinstance<py::function>(op)) {
115         tensor_op = std::make_shared<PyFuncOp>(op.cast<py::function>());
116         vector.push_back(std::make_shared<transforms::PreBuiltOperation>(tensor_op));
117       } else {
118         if (py::isinstance<TensorOperation>(op)) {
119           vector.push_back(op.cast<std::shared_ptr<TensorOperation>>());
120         } else {
121           THROW_IF_ERROR([]() {
122             RETURN_STATUS_UNEXPECTED(
123               "Error: tensor_op is not recognised (not TensorOp, TensorOperation and not pyfunc).");
124           }());
125         }
126       }
127     }
128   }
129   return vector;
130 }
131 
toTensorOperation(py::handle operation)132 std::shared_ptr<TensorOperation> toTensorOperation(py::handle operation) {
133   std::shared_ptr<TensorOperation> op;
134   std::shared_ptr<TensorOp> tensor_op;
135   if (py::isinstance<TensorOperation>(operation)) {
136     op = operation.cast<std::shared_ptr<TensorOperation>>();
137   } else if (py::isinstance<TensorOp>(operation)) {
138     tensor_op = operation.cast<std::shared_ptr<TensorOp>>();
139     op = std::make_shared<transforms::PreBuiltOperation>(tensor_op);
140   } else {
141     THROW_IF_ERROR(
142       []() { RETURN_STATUS_UNEXPECTED("Error: input operation is not a tensor_op or TensorOperation."); }());
143   }
144   return op;
145 }
146 
toDatasetNode(std::shared_ptr<DatasetNode> self,py::list datasets)147 std::vector<std::shared_ptr<DatasetNode>> toDatasetNode(std::shared_ptr<DatasetNode> self, py::list datasets) {
148   std::vector<std::shared_ptr<DatasetNode>> vector;
149   vector.push_back(self);
150   if (datasets) {
151     for (auto ds : *datasets) {
152       if (py::isinstance<DatasetNode>(ds)) {
153         vector.push_back(ds.cast<std::shared_ptr<DatasetNode>>());
154       } else {
155         THROW_IF_ERROR(
156           []() { RETURN_STATUS_UNEXPECTED("Error: datasets is not recognised (not a DatasetNode instance)."); }());
157       }
158     }
159   }
160   return vector;
161 }
162 
toSamplerObj(const py::handle py_sampler,bool isMindDataset)163 std::shared_ptr<SamplerObj> toSamplerObj(const py::handle py_sampler, bool isMindDataset) {
164   if (py_sampler.is_none()) {
165     return nullptr;
166   }
167   if (py_sampler) {
168     std::shared_ptr<SamplerObj> sampler_obj;
169     if (!isMindDataset) {
170       auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse");
171       sampler_obj = parse().cast<std::shared_ptr<SamplerObj>>();
172     } else {
173       // Mindrecord Sampler
174       std::shared_ptr<mindrecord::ShardOperator> sampler;
175       auto parse = py::reinterpret_borrow<py::object>(py_sampler).attr("parse_for_minddataset");
176       sampler = parse().cast<std::shared_ptr<mindrecord::ShardOperator>>();
177       sampler_obj = std::make_shared<PreBuiltSamplerObj>(std::move(sampler));
178     }
179     return sampler_obj;
180   } else {
181     THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: sampler input is not SamplerRT."); }());
182   }
183   return nullptr;
184 }
185 
186 // Here we take in a python object, that holds a reference to a C++ object
toDatasetCache(std::shared_ptr<CacheClient> cc)187 std::shared_ptr<DatasetCache> toDatasetCache(std::shared_ptr<CacheClient> cc) {
188   if (cc) {
189     std::shared_ptr<DatasetCache> built_cache;
190     built_cache = std::make_shared<PreBuiltDatasetCache>(std::move(cc));
191     return built_cache;
192   } else {
193     // don't need to check here as cache is not enabled.
194     return nullptr;
195   }
196 }
197 
toShuffleMode(const int32_t shuffle)198 ShuffleMode toShuffleMode(const int32_t shuffle) {
199   if (shuffle == 0) {
200     return ShuffleMode::kFalse;
201   }
202   if (shuffle == 1) {
203     return ShuffleMode::kFiles;
204   }
205   if (shuffle == 2) {
206     return ShuffleMode::kGlobal;
207   }
208   return ShuffleMode();
209 }
210 
toCSVBase(py::list csv_bases)211 std::vector<std::shared_ptr<CsvBase>> toCSVBase(py::list csv_bases) {
212   std::vector<std::shared_ptr<CsvBase>> vector;
213   if (csv_bases) {
214     for (auto base : *csv_bases) {
215       if (py::isinstance<py::int_>(base)) {
216         vector.push_back(std::make_shared<CsvRecord<int>>(CsvType::INT, toInt(base)));
217       } else if (py::isinstance<py::float_>(base)) {
218         vector.push_back(std::make_shared<CsvRecord<float>>(CsvType::FLOAT, toFloat(base)));
219       } else if (py::isinstance<py::str>(base)) {
220         vector.push_back(std::make_shared<CsvRecord<std::string>>(CsvType::STRING, toString(base)));
221       } else {
222         THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Error: each default value must be int, float, or string"); }());
223       }
224     }
225   }
226   return vector;
227 }
228 
ToJson(const py::handle & padded_sample,nlohmann::json * const padded_sample_json,std::map<std::string,std::string> * sample_bytes)229 Status ToJson(const py::handle &padded_sample, nlohmann::json *const padded_sample_json,
230               std::map<std::string, std::string> *sample_bytes) {
231   for (const py::handle &key : padded_sample) {
232     if (py::isinstance<py::bytes>(padded_sample[key])) {
233       (*sample_bytes)[py::str(key).cast<std::string>()] = padded_sample[key].cast<std::string>();
234       // py::str(key) enter here will loss its key name, so we create an unuse key for it in json, to pass ValidateParam
235       (*padded_sample_json)[py::str(key).cast<std::string>()] = nlohmann::json::object();
236     } else {
237       nlohmann::json obj_json;
238       if (padded_sample[key].is_none()) {
239         obj_json = nullptr;
240       } else if (py::isinstance<py::int_>(padded_sample[key])) {
241         obj_json = padded_sample[key].cast<int64_t>();
242       } else if (py::isinstance<py::float_>(padded_sample[key])) {
243         obj_json = padded_sample[key].cast<double>();
244       } else if (py::isinstance<py::str>(padded_sample[key])) {
245         obj_json = padded_sample[key].cast<std::string>();  // also catch py::bytes
246       } else {
247         MS_LOG(ERROR) << "Python object convert to json failed: " << py::cast<std::string>(padded_sample[key]);
248         RETURN_STATUS_SYNTAX_ERROR("Python object convert to json failed");
249       }
250       (*padded_sample_json)[py::str(key).cast<std::string>()] = obj_json;
251     }
252   }
253   return Status::OK();
254 }
255 
toPadInfo(py::dict value,std::map<std::string,std::pair<TensorShape,std::shared_ptr<Tensor>>> * pad_info)256 Status toPadInfo(py::dict value, std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> *pad_info) {
257   constexpr size_t kExpectedTupleSize = 2;
258   for (auto p : value) {
259     if (!p.second.is_none()) {
260       auto tp = py::reinterpret_borrow<py::tuple>(p.second);
261       CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == kExpectedTupleSize,
262                                    "tuple in pad_info must be (list,int) or (list,float)");
263       TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]);
264       std::shared_ptr<Tensor> pad_val = nullptr;
265       if (py::isinstance<py::str>(tp[1])) {
266         std::string pad_val_string = tp[1].is_none() ? "" : toString(tp[1]);
267         CHECK_FAIL_RETURN_UNEXPECTED(
268           Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(), &pad_val),
269           "Cannot create pad_value Tensor");
270       } else {
271         float pad_val_float = tp[1].is_none() ? 0 : toFloat(tp[1]);
272         CHECK_FAIL_RETURN_UNEXPECTED(
273           Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_val),
274           "Cannot create pad_value Tensor");
275         RETURN_IF_NOT_OK(pad_val->SetItemAt<float>({}, pad_val_float));
276       }
277       (void)pad_info->insert({toString(p.first), {shape, pad_val}});
278     } else {  // tuple is None
279       (void)pad_info->insert({toString(p.first), {TensorShape({}), nullptr}});
280     }
281   }
282   return Status::OK();
283 }
284 
toPyFuncOp(py::object func,DataType::Type data_type)285 std::shared_ptr<TensorOp> toPyFuncOp(py::object func, DataType::Type data_type) {
286   std::shared_ptr<TensorOp> py_func;
287   if (!func.is_none()) {
288     py::function py_function = func.cast<py::function>();
289     py_func = std::make_shared<PyFuncOp>(py_function, data_type);
290   } else {
291     py_func = nullptr;
292   }
293   return py_func;
294 }
295 
shapesToListOfShape(std::vector<TensorShape> shapes)296 py::list shapesToListOfShape(std::vector<TensorShape> shapes) {
297   py::list shape_list;
298   for (const auto &shape : shapes) {
299     shape_list.append(shape.AsVector());
300   }
301   return shape_list;
302 }
303 
typesToListOfType(std::vector<DataType> types)304 py::list typesToListOfType(std::vector<DataType> types) {
305   py::list type_list;
306   for (const auto &type : types) {
307     type_list.append(type.AsNumpyType());
308   }
309   return type_list;
310 }
311 }  // namespace dataset
312 }  // namespace mindspore
313