• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASET_ITERATOR_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASET_ITERATOR_H_
18 
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <utility>
23 #include <vector>
24 
25 #include "minddata/dataset/core/tensor.h"
26 #include "minddata/dataset/engine/datasetops/dataset_op.h"
27 #include "minddata/dataset/engine/execution_tree.h"
28 #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
29 #include "minddata/dataset/util/status.h"
30 
31 namespace mindspore {
32 namespace dataset {
33 using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
34 
35 // forward declare
36 class ExecutionTree;
37 
38 // The DatasetIterator derived class is for fetching rows off the end/root of the execution tree.
39 class DatasetIterator {
40  public:
41   // Constructor of the DatasetIterator
42   // @param exe_tree The execution tree we want to pull/iterate the data from using it's root node.
43   explicit DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree);
44 
45   // Destructor
46   ~DatasetIterator();
47 
48   // Getter
49   // @return The string to column id mapping.
50   std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
51 
EofHandled()52   bool EofHandled() const { return eof_handled_; }
53 
54   // Fetches one row of data from the iterator.
55   // the base class version simply performs error handling and returns empty row. Actual
56   // functionality exists in the derived versions of this function.
57   // @param out_row - A TensorRow (vector of shared pointers to Tensors).  If any of the of data
58   // messages are encountered (such as eoe or eof), then an empty TensorRow is returned back.
59   // @return Status The status code returned
60   // @note The position of a Tensor/column might be different from the initial column order
61   // in corresponding Dataset Op. User must be aware that MapOp, ZipOps, and others might change
62   // the column ordering.
63   Status FetchNextTensorRow(TensorRow *out_row);
64 
65   // Fetches one row of data from the iterator as a column map.
66   // @return A unordered map from column name to shared pointer to Tensor.
67   Status GetNextAsMap(TensorMap *out_map);
68 
69  private:
70   std::shared_ptr<DatasetOp> root_;  // saves the root of the executionTree
71   TensorRow device_queue_row_;
72 #ifndef ENABLE_SECURITY
73   std::shared_ptr<DatasetIteratorTracing> tracing_;  // trace profiling data
74 #endif
75   int32_t cur_batch_num_;           // current batch number,used for profiling
76   int32_t cur_connector_size_;      // current connector size of root op,used for profiling
77   int32_t cur_connector_capacity_;  // current connector capacity of root op, used for profiling
78   bool eof_handled_;                // T/F if this op got an eof
79   std::unordered_map<std::string, int32_t> col_name_id_map_;
80   std::vector<std::pair<std::string, int32_t>> column_order_;  // key: column name, val: column id
81 };
82 
83 // The ChildIterator derived class is for fetching rows from intermediate nodes of execution tree.
84 // This one should only be used by internal Dataset operators, rather than an end-user.
85 class ChildIterator {
86  public:
87   // Constructor of the DatasetIterator
88   // @param current_op - The parent op from which we'll fetch from it's children.
89   // @param worker_id - The worker id to use when fetching from the children.
90   // @param child_idx - The index to the child to fetch from.
91   ChildIterator(DatasetOp *current_op, int32_t worker_id, int32_t child_idx);
92 
93   // Destructor
94   ~ChildIterator();
95 
96   // Fetches one row of data from the iterator.  Overrides the base class.  This one fetches
97   // only from the child/worker id as given from the constructor.
98   // @param out_row - A TensorRow (vector of shared pointers to Tensors).  If any of the of data
99   // messages are encountered (such as eoe or eof), then an empty TensorRow is returned back.
100   // @return Status The status code returned
101   Status FetchNextTensorRow(TensorRow *out_row);
102 
103   // This function drains buffer until next eoe has been received.
104   // It will be a no-op if the previous row returned is empty.
105   // @return Status The status code returned
106   Status Drain();
107 
108   // Getter
109   // @return The string to column id mapping.
110   std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
111 
112   // Return T/F if end of epoch
EndOfEpoch()113   bool EndOfEpoch() { return end_epoch_; }
114 
115   // Getter
116   // @return T/F if this iterator is completely done after getting an eof
EofHandled()117   bool EofHandled() const { return eof_handled_; }
118 
119  private:
120   DatasetOp *current_op_;  // The parent operator. We consume from it's children.
121   int32_t child_idx_;      // The specific child this iterator will fetch from.
122   int32_t worker_id_;      // The worker id uses for fetching the child data.
123   bool end_epoch_;         // the flag used when an empty row has been returned.
124   bool eof_handled_;       // T/F if this op got an eof
125 };
126 }  // namespace dataset
127 }  // namespace mindspore
128 
129 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASET_ITERATOR_H_
130