• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/engine/datasetops/dataset_op.h"
17 
18 #include <iomanip>
19 #include <iostream>
20 #include <memory>
21 #include <regex>
22 #include <utility>
23 #include <string>
24 #include <algorithm>
25 
26 #include "minddata/dataset/engine/datasetops/device_queue_op.h"
27 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
28 
29 #include "minddata/dataset/engine/db_connector.h"
30 #ifndef ENABLE_ANDROID
31 #include "utils/system/crc32c.h"
32 #include "utils/log_adapter.h"
33 #else
34 #include "mindspore/lite/src/common/log_adapter.h"
35 #endif
36 
37 namespace mindspore {
38 namespace dataset {
39 // Constructor
DatasetOp(int32_t op_connector_size,std::shared_ptr<SamplerRT> sampler)40 DatasetOp::DatasetOp(int32_t op_connector_size, std::shared_ptr<SamplerRT> sampler)
41     : oc_queue_size_(op_connector_size),
42       sampler_(sampler),
43       operator_id_(kInvalidOperatorId),
44       tree_(nullptr),
45       state_(OpState::kDeOpIdle),
46       op_total_repeats_(kInfiniteRepeat),
47       op_num_repeats_per_epoch_(kInfiniteRepeat),
48       op_current_repeats_(0),
49       op_current_epochs_(0),
50       out_connector_(nullptr),
51       dataset_size_(-1),
52       num_classes_(-1) {
53   // The operator starts out with an invalid operator id.  The only way to
54   // get it out of invalid state is to assign the operator to an execution tree.
55 }
56 
57 // Adds a operator to become our child.
AddChild(std::shared_ptr<DatasetOp> child)58 Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
59   if (std::dynamic_pointer_cast<DeviceQueueOp>(child) != nullptr) {
60     std::string err_msg(
61       "Unsupported scenario, \'send\' operator can only be after \'device_queue\' operation, but got " + Name());
62     RETURN_STATUS_UNEXPECTED(err_msg);
63   }
64   if (operator_id_ == kInvalidOperatorId) {
65     std::string err_msg(
66       "Cannot add child node. Tree node connections can only "
67       "be made if the node belongs to a tree.");
68     RETURN_STATUS_UNEXPECTED(err_msg);
69   }
70 
71   // disallow relationships with other trees
72   if (tree_ != child->tree_) {
73     std::string err_msg(
74       "Cannot add child node. Tree node connections can only be made if both nodes belong to the same tree.");
75     RETURN_STATUS_UNEXPECTED(err_msg);
76   }
77   child_.push_back(child);
78   child->AddParent(this);
79   return Status::OK();
80 }
81 
RemoveChild(std::shared_ptr<DatasetOp> child)82 Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) {
83   if (operator_id_ == kInvalidOperatorId) {
84     std::string err_msg(
85       "Cannot remove child node. Tree node connections can only "
86       "be made if the node belongs to a tree.");
87     RETURN_STATUS_UNEXPECTED(err_msg);
88   }
89 
90   // disallow relationships with other trees
91   if (tree_ != child->tree_) {
92     std::string err_msg(
93       "Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree.");
94     RETURN_STATUS_UNEXPECTED(err_msg);
95   }
96 
97   child_.erase(std::remove(child_.begin(), child_.end(), child), child_.end());
98   child->RemoveParent(this);
99   return Status::OK();
100 }
101 
InsertAsParent(std::shared_ptr<DatasetOp> to_add)102 Status DatasetOp::InsertAsParent(std::shared_ptr<DatasetOp> to_add) {
103   RETURN_UNEXPECTED_IF_NULL(to_add);
104   for (auto &prev_parent : this->parent_) {
105     RETURN_IF_NOT_OK(prev_parent->RemoveChild(shared_from_this()));
106     RETURN_IF_NOT_OK(prev_parent->AddChild(to_add));
107   }
108   RETURN_IF_NOT_OK(to_add->AddChild(shared_from_this()));
109   if (tree_->root()->id() == this->id()) {
110     RETURN_IF_NOT_OK(tree_->AssignRoot(to_add));
111   }
112   return Status::OK();
113 }
114 // Removes child operator in this operator.
RemoveChildren()115 Status DatasetOp::RemoveChildren() {
116   for (const auto &child : child_) {
117     child->RemoveParent(this);
118   }
119   child_.clear();
120 
121   return Status::OK();
122 }
123 
124 // Adds a parent operator to this operator
AddParent(DatasetOp * parent)125 void DatasetOp::AddParent(DatasetOp *parent) { parent_.push_back(parent); }
126 
127 // Removes a parent operator from this operator
RemoveParent(const DatasetOp * parent)128 void DatasetOp::RemoveParent(const DatasetOp *parent) {
129   parent_.erase(std::remove(parent_.begin(), parent_.end(), parent), parent_.end());
130 }
131 
132 // Removes this node from the tree and connects it's parent/child together
Remove()133 Status DatasetOp::Remove() {
134   if (parent_.size() > 1) {
135     std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
136     RETURN_STATUS_UNEXPECTED(err_msg);
137   }
138   if (child_.size() > 1) {
139     std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
140     RETURN_STATUS_UNEXPECTED(err_msg);
141   }
142 
143   // Scenario's when removing node B:
144   // A -> B -> C
145   // A -> B
146   // B -> C
147   //
148   // If we remove B, then first take our child A and update it's parent to be C
149   // It's possible the parent is null if we are the root node being removed.
150   if (!child_.empty()) {
151     // If we have a parent, then assign child's parent to point to our parent.
152     if (!parent_.empty()) {
153       CHECK_FAIL_RETURN_UNEXPECTED(parent_[0]->Children().size() == 1,
154                                    "Removing a node whose parent has more than 1 child is not supported.");
155       child_[0]->parent_[0] = parent_[0];
156     } else {
157       // We don't have a parent, so we are the root node being removed.
158       // clear the parent list of our child so that it becomes the new root.
159       child_[0]->parent_.clear();
160       RETURN_IF_NOT_OK(tree_->AssignRoot(child_[0]));
161     }
162   }
163 
164   // Next, if we had a parent, then set it's child to be our child.
165   if (!parent_.empty()) {
166     // if we have a child, then set our parent to point to it
167     if (!child_.empty()) {
168       parent_[0]->child_[0] = child_[0];
169     } else {
170       // We don't have a child, so clear the child list of the current
171       // parent because it will be empty once we are removed.
172       parent_[0]->child_.clear();
173     }
174   }
175 
176   // Finally, clear "this" op's parent and child pointers since we have just
177   // disconnected it from the tree and invalidate it's fields.
178   child_.clear();
179   parent_.clear();
180   operator_id_ = kInvalidOperatorId;
181   tree_ = nullptr;
182 
183   return Status::OK();
184 }
185 
186 // Getter function to get a shared pointer to our child
child(int32_t child_index) const187 std::shared_ptr<DatasetOp> DatasetOp::child(int32_t child_index) const {
188   std::shared_ptr<DatasetOp> return_op = nullptr;
189   if (child_.empty()) {
190     return return_op;
191   }
192   MS_ASSERT(child_index < static_cast<int>(child_.size()));
193   // Return a shared pointer
194   return child_[child_index];
195 }
196 
197 // Getter function to get the parent pointer
Parent(DatasetOp ** parent,int32_t parent_index) const198 void DatasetOp::Parent(DatasetOp **parent, int32_t parent_index) const {
199   if (parent_.empty()) {
200     // common case if this is a root node
201     *parent = nullptr;
202   } else {
203     MS_ASSERT(parent_index < static_cast<int>(parent_.size()));
204     *parent = parent_[parent_index];
205   }
206 }
207 
208 // Getter function to get all of our parents.
parents() const209 std::vector<DatasetOp *> DatasetOp::parents() const { return parent_; }
210 
211 // Creates the connector within this operator
CreateConnector(int32_t num_producers,int32_t num_consumers)212 void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) {
213   MS_LOG(DEBUG) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers
214                 << ". Consumer: " << num_consumers << ".";
215   if (oc_queue_size_ > 0) {
216     out_connector_ = std::make_unique<DbConnector>(num_producers,  // The number of producers
217                                                    num_consumers,  // Only one consumer (the training App)
218                                                    oc_queue_size_);
219   } else {
220     // Some op's may choose not to have an output connector
221     MS_LOG(DEBUG) << "Bypassed connector creation for tree operator: " << operator_id_ << ".";
222     out_connector_ = nullptr;
223   }
224 }
225 
226 // A print method typically used for debugging.  showAll of true will recursively descend to child prints
Print(std::ostream & out,bool show_all) const227 void DatasetOp::Print(std::ostream &out, bool show_all) const {
228   // When show_all is false, we display a 1 liner piece of text for the op.
229   // When show_all is true, we display more detailed output for the op.
230   // Derived printers should show their own header info, then call base class printer, followed by
231   // derived-specific items.
232 
233   // Always show the id and name as first line regardless if this summary or detailed print
234   out << "(" << std::setw(2) << operator_id_ << ") <" << Name() << ">:";
235 
236   if (show_all) {
237     // The detailed display will show common base class info of the op.  Allow the derived class to print
238     // it's own id and name though as the first line.
239     out << "\nNumber of children     : " << child_.size();
240     for (size_t i = 0; i < child_.size(); i++) {
241       out << "\n  Child[" << i << "] id: " << child_[i]->id();
242     }
243     out << "\nNumber of parents      : " << parent_.size();
244     for (size_t i = 0; i < parent_.size(); i++) {
245       out << "\n  Parent[" << i << "] id: " << parent_[i]->id();
246     }
247     out << "\nConnector queue size   : " << oc_queue_size_ << "\nTotal repeats : " << op_total_repeats_
248         << "\nNumber repeats per epoch : " << op_num_repeats_per_epoch_;
249     if (sampler_) {
250       out << "\nSampler:\n";
251       sampler_->SamplerPrint(out, show_all);
252     }
253   }
254 }
255 
GetNextRowPullMode(TensorRow * const row)256 Status DatasetOp::GetNextRowPullMode(TensorRow *const row) {
257   RETURN_UNEXPECTED_IF_NULL(child_[0]);
258   return child_[0]->GetNextRowPullMode(row);
259 }
260 
261 // Gets the next row from the given child
GetNextRow(TensorRow * row,int32_t worker_id,bool retry_if_eoe)262 Status DatasetOp::GetNextRow(TensorRow *row, int32_t worker_id, bool retry_if_eoe) {
263   // pop is a blocked call and will throw an interruption if the whole group shuts down.
264   RETURN_IF_NOT_OK(out_connector_->PopWithRetry(static_cast<int>(worker_id), row, retry_if_eoe));
265   return Status::OK();
266 }
267 
268 // Gets the number of classes
GetNumClasses(int64_t * num_classes)269 Status DatasetOp::GetNumClasses(int64_t *num_classes) {
270   if (child_.size() == 1) {
271     return child_[0]->GetNumClasses(num_classes);
272   } else if (child_.size() > 1) {
273     // It is okay for dataset to have more than 1 child, GetNumClasses shouldn't fail in this case.
274     // This is done mostly for cache, which injects cache lookup/merge operators. Cache path will
275     // always be in front of the child_ structure, so we get num classes from the last child.
276     return child_[child_.size() - 1]->GetNumClasses(num_classes);
277   } else {
278     // when num classes isn't found, the default behavior is to return -1
279     MS_LOG(WARNING) << "Num classes not defined for : " << Name();
280     *num_classes = -1;
281     return Status::OK();
282   }
283 }
284 
GetClassIndexing(std::vector<std::pair<std::string,std::vector<int32_t>>> * output_class_indexing)285 Status DatasetOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
286   if (child_.size() == 1) {
287     return child_[0]->GetClassIndexing(output_class_indexing);
288   } else if (child_.size() > 1) {
289     // It is okay for dataset to have more than 1 child, GetClassIndexing shouldn't fail in this case.
290     // This is done mostly for cache, which injects cache lookup/merge operators. Cache path will
291     // always be in the front of the child_ structure, so we get data from the last child.
292     return child_[child_.size() - 1]->GetClassIndexing(output_class_indexing);
293   } else {
294     *output_class_indexing = {};
295     RETURN_STATUS_UNEXPECTED("Trying to get class index from leaf node, missing override.");
296   }
297 }
298 
299 // Performs handling for when an eoe message is received.
300 // The base class implementation simply flows the eoe message to output. Derived classes
301 // may override if they need to perform special eoe handling.
EoeReceived(int32_t worker_id)302 Status DatasetOp::EoeReceived(int32_t worker_id) { return out_connector_->SendEOE(worker_id); }
303 
304 // Performs handling for when an eof message is received.
305 // The base class implementation simply flows the eof message to output. Derived classes
306 // may override if they need to perform special eof handling.
EofReceived(int32_t worker_id)307 Status DatasetOp::EofReceived(int32_t worker_id) { return out_connector_->SendEOF(worker_id); }
308 
309 // During tree prepare phase, operators may have specific post-operations to perform depending on their role.
PrepareOperator()310 Status DatasetOp::PrepareOperator() {
311   // Creating Connector object for each op.
312   // The consumer of the root node is assumed to be one thread.
313   // If multiple threads are consuming from the root node, they will get the ordered data in round robin fashion.
314   if (parent_.empty()) {
315     this->CreateConnector(NumProducers(), 1);
316   } else {
317     this->CreateConnector(NumProducers(), parent_[0]->NumConsumers());
318   }
319   if (out_connector_) {
320     RETURN_IF_NOT_OK(out_connector_->Register(tree_->AllTasks()));
321   }
322   RETURN_IF_NOT_OK(this->RegisterWorkerConnectors());
323 
324   // Generate the column name map for the current op.
325   RETURN_IF_NOT_OK(this->ComputeColMap());
326 
327   return Status::OK();
328 }
329 
330 // Derived classes may implement the reset function if the operator is stateful and needs
331 // specific reset handling that is not contained in this common code version of the reset.
Reset()332 Status DatasetOp::Reset() {
333   state_ = OpState::kDeOpRunning;
334   return Status::OK();
335 }
336 
337 // gives a string output for the column map for handy debug printing
ColumnNameMapAsString() const338 std::string DatasetOp::ColumnNameMapAsString() const {
339   std::string outStr = "Column name id map: ";
340   for (auto &it : column_name_id_map_) {
341     outStr += (" " + it.first + ":" + std::to_string(it.second));
342   }
343   return outStr;
344 }
345 
346 // Computing the assignment of the column name map.
347 // This just inherits the column map from its first child, can only be used if the number of children is 1.
348 // Operations changing the column map must overwrite this function.
ComputeColMap()349 Status DatasetOp::ComputeColMap() {
350   if (child_.size() > 1) {
351     RETURN_STATUS_UNEXPECTED("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
352   }
353   if (column_name_id_map_.empty()) {
354     column_name_id_map_ = child_[0]->column_name_id_map();
355     if (column_name_id_map_.empty()) {
356       RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
357     }
358     MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString();
359   } else {
360     MS_LOG(WARNING) << "Column name map is already set!";
361   }
362   return Status::OK();
363 }
364 
365 // Getter for the sampler, and it also removes the sampler from the op
FetchRemoveSampler(std::shared_ptr<SamplerRT> * sampler)366 Status DatasetOp::FetchRemoveSampler(std::shared_ptr<SamplerRT> *sampler) {
367   *sampler = sampler_;  // It's okay if it sampler_ points to nullptr
368   sampler_.reset();     // clear our member-copy of this pointer.  We no longer have this sampler
369   return Status::OK();
370 }
371 
372 #ifndef ENABLE_ANDROID
GenerateCRC(const std::shared_ptr<DatasetOp> & op)373 uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
374   std::stringstream ss;
375   op->tree_->Print(ss, op);
376   std::string ss_str = ss.str();
377 
378   // Filter out the Num workers field when generating the check sum
379   ss_str = std::regex_replace(ss_str, std::regex("Number of ShardReader workers.*\n"), "");
380   ss_str = std::regex_replace(ss_str, std::regex("Num workers.*\n"), "");
381   ss_str = std::regex_replace(ss_str, std::regex("\\[workers.*?\\]"), "");
382   ss_str = std::regex_replace(ss_str, std::regex("Connector queue size.*\n"), "");
383 
384   // Filter out tcp/ip information
385   ss_str = std::regex_replace(ss_str, std::regex("Hostname.*\n"), "");
386   ss_str = std::regex_replace(ss_str, std::regex("Port.*\n"), "");
387   ss_str = std::regex_replace(ss_str, std::regex("Number of rpc workers.*\n"), "");
388   ss_str = std::regex_replace(ss_str, std::regex("Prefetch size.*\n"), "");
389   ss_str = std::regex_replace(ss_str, std::regex("Local client support.*\n"), "");
390 
391   // Filter out Number of rows when generating the check sum
392   ss_str = std::regex_replace(ss_str, std::regex("Number of rows.*\n"), "");
393 
394   // Filter out the Operator control flags field when generating the check sum
395   ss_str = std::regex_replace(ss_str, std::regex("Operator control flags.*\n"), "");
396 
397   // Filter out the Device id field to allow cache sharing for a distributed run of the same pipeline
398   ss_str = std::regex_replace(ss_str, std::regex("Device id.*\n"), "");
399   ss_str = std::regex_replace(ss_str, std::regex("device_id.*\n"), "");
400 
401   // Filter out the operator id field
402   ss_str = std::regex_replace(ss_str, std::regex(" *Parent.*\n"), "");
403   ss_str = std::regex_replace(ss_str, std::regex(" *Child.*\n"), "");
404   ss_str = std::regex_replace(ss_str, std::regex(R"(\(\s*\d+?\))"), "");
405 
406   // Doesn't matter whether there is any parent node above CacheOp or not.
407   ss_str = std::regex_replace(ss_str, std::regex("Number of parents.*\n"), "");
408 
409   // Filter out shuffle seed from ShuffleOp
410   ss_str = std::regex_replace(ss_str, std::regex("Shuffle seed.*\n"), "");
411 
412   // Filter out the total repeats and number repeats per epoch field
413   ss_str = std::regex_replace(ss_str, std::regex("Total repeats.*\n"), "");
414   ss_str = std::regex_replace(ss_str, std::regex("Number repeats per epoch.*\n"), "");
415 
416   // The Cache crc and Server cache id field is different when creating new cache_client and re-using the same
417   // cache_client later. So we filter out these two fields to allow cache sharing.
418   ss_str = std::regex_replace(ss_str, std::regex("Cache crc.*\n"), "");
419   ss_str = std::regex_replace(ss_str, std::regex("Server cache id.*\n"), "");
420 
421   MS_LOG(DEBUG) << "Printing the tree for generating crc:\n" << ss_str;
422 
423   uint32_t cache_crc = system::Crc32c::GetMaskCrc32cValue(ss_str.c_str(), ss_str.length());
424   return cache_crc;
425 }
426 #endif
427 
UpdateRepeatAndEpochCounter()428 void DatasetOp::UpdateRepeatAndEpochCounter() {
429   op_current_repeats_++;
430   if (op_current_repeats_ % op_num_repeats_per_epoch_ == 0) op_current_epochs_++;
431   MS_LOG(DEBUG) << Name() << " current repeats: " << op_current_repeats_ << ", current epochs: " << op_current_epochs_;
432 }
433 
GetTreeBatchSize()434 int64_t DatasetOp::GetTreeBatchSize() {
435   if (child_.size() == 1) {
436     return child_[0]->GetTreeBatchSize();
437   } else if (child_.size() > 1) {
438     // It is okay for dataset to have more than 1 child, GetBatchSize shouldn't fail in this case.
439     // This is done mostly for cache, which injects cache lookup/merge operators. Cache path will
440     // always be in front of the child_ structure, so we get data from the last child.
441     return child_[child_.size() - 1]->GetTreeBatchSize();
442   } else {
443     return 1;
444   }
445 }
446 
GetTreeRepeatCount()447 int64_t DatasetOp::GetTreeRepeatCount() {
448   if (child_.size() == 1) {
449     return child_[0]->GetTreeRepeatCount();
450   } else if (child_.size() > 1) {
451     // It is okay for dataset to have more than 1 child, GetRepeatCount shouldn't fail in this case.
452     // This is done mostly for cache, which injects cache lookup/merge operators. Cache path will
453     // always be in front of the child_ structure, so we get data from the last child.
454     return child_[child_.size() - 1]->GetTreeRepeatCount();
455   } else {
456     return 1;
457   }
458 }
459 }  // namespace dataset
460 }  // namespace mindspore
461