• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_
19 
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 
26 #include "minddata/dataset/engine/execution_tree.h"
27 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
28 #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
29 
30 namespace mindspore {
31 namespace dataset {
32 class DatasetNode;
33 
34 class TreeAdapter {
35  public:
36   // this flag is used to indicate the purpose of the creation of this tree adapter (type of the tree_consumer).
37   // Currently there are 3 types of consumer, Iterator, Getter and TDT/Vocab/Save ...
38   // To avoid premature optimization, the last type (TDT/Vocab/Save) is regarded as Iterator for now.
39   enum UsageFlag { kDeIterator = 0, kDeGetter = 1 };
40 
41   explicit TreeAdapter(UsageFlag flag = kDeIterator);
42 
43   ~TreeAdapter() = default;
44 
45   // This function performs syntax checking, semantics checking, optimizes, and then builds
46   // the Execution tree.
47   Status Compile(std::shared_ptr<DatasetNode> root_ir, int32_t num_epochs = -1);
48 
49   // Return the root node of the IR after cloned from the parsed IR tree
RootIRNode()50   std::shared_ptr<DatasetNode> RootIRNode() const { return root_ir_; }
51 
52   // This is the main method TreeConsumer uses to interact with TreeAdapter
53   // 1. GetNext will Launch() the ExeTree on its first call by iterator (tree is already prepared)
54   // 2. GetNext will return empty row when eoe/eof is obtained
55   Status GetNext(TensorRow *);
56 
57   // unique_ptr overloads operator bool(), will return false if it doesn't manage an object
GetRoot()58   std::weak_ptr<DatasetOp> GetRoot() { return tree_ ? tree_->root() : nullptr; }
59 
60   // This function will return the column_name_map once BuildAndPrepare() is called
GetColumnNameMap()61   std::unordered_map<std::string, int32_t> GetColumnNameMap() const { return column_name_map_; }
62 
63   // This function returns the TaskGroup associated with ExeTree. This is needed by DeviceQueueConsumer
64   // to be able to launch a thread. BuildAndPrepare needs to be called before this function
AllTasks()65   TaskGroup *const AllTasks() const { return tree_ ? tree_->AllTasks() : nullptr; }
66 
67   Status Launch();
68 
69   // Set optional optimization pass
SetOptimize(bool value)70   void SetOptimize(bool value) { optimize_ = value; }
71 
72   // Optional optimizations status
OptimizationEnabled()73   bool OptimizationEnabled() const { return optimize_; }
74 
75  private:
76   // Run the mandatory pass checking the syntax and semantics of the IR tree
77   Status PrePass(std::shared_ptr<DatasetNode> ir);
78 
79   // Run the optional optimization pass on the IR tree
80   Status Optimize(std::shared_ptr<DatasetNode> ir);
81 
82   // Run the mandatory pass augmenting the IR tree
83   Status PostPass(std::shared_ptr<DatasetNode> ir);
84 
85   // Build an Execution tree
86   Status Build(std::shared_ptr<DatasetNode> root_ir);
87 
88   // This RECURSIVE function walks the (optimized) IR tree in DFS to build its corresponding Execution tree.
89   Status BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *op);
90 
91   std::unordered_map<std::string, int32_t> column_name_map_;
92   std::shared_ptr<DatasetNode> root_ir_;
93   std::unique_ptr<ExecutionTree> tree_;  // current connector capacity of root op, used for profiling
94   bool optimize_;                        // Flag to enable optional optimization pass
95 #ifndef ENABLE_SECURITY
96   std::shared_ptr<DatasetIteratorTracing> tracing_;  // trace profiling data
97 #endif
98   int32_t cur_batch_num_;           // current batch number, used for profiling
99   int32_t cur_connector_size_;      // current connector size of root op, used for profiling
100   int32_t cur_connector_capacity_;  // current connector capacity of root op, used for profiling
101   UsageFlag usage_;                 // usage of this tree adapter (type of consumer)
102   bool launched_;
103   // State flags for the lifecycle of the tree
104   enum CompileState {
105     kCompileStateInit = 0,      // The freshly initialized state
106     kCompileStateIRGraphBuilt,  // User code has been parsed and its IR graph built
107     kCompileStateIRTreeCloned,  // IR tree has been cloned from the IR graph
108     kCompileStateOptimized,     // IR tree has been optimized
109     kCompileStateReady          // Execution tree is generated from the optimized IR
110   };
111   CompileState tree_state_;
112 };
113 }  // namespace dataset
114 }  // namespace mindspore
115 
116 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_
117