1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_ 19 20 #include <memory> 21 #include <string> 22 #include <unordered_map> 23 #include <utility> 24 #include <vector> 25 26 #include "minddata/dataset/engine/execution_tree.h" 27 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 28 #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h" 29 30 namespace mindspore { 31 namespace dataset { 32 class DatasetNode; 33 34 class TreeAdapter { 35 public: 36 // this flag is used to indicate the purpose of the creation of this tree adapter (type of the tree_consumer). 37 // Currently there are 3 types of consumer, Iterator, Getter and TDT/Vocab/Save ... 38 // To avoid premature optimization, the last type (TDT/Vocab/Save) is regarded as Iterator for now. 39 enum UsageFlag { kDeIterator = 0, kDeGetter = 1 }; 40 41 explicit TreeAdapter(UsageFlag flag = kDeIterator); 42 43 ~TreeAdapter() = default; 44 45 // This function performs syntax checking, semantics checking, optimizes, and then builds 46 // the Execution tree. 47 Status Compile(std::shared_ptr<DatasetNode> root_ir, int32_t num_epochs = -1); 48 49 // Return the root node of the IR after cloned from the parsed IR tree RootIRNode()50 std::shared_ptr<DatasetNode> RootIRNode() const { return root_ir_; } 51 52 // This is the main method TreeConsumer uses to interact with TreeAdapter 53 // 1. GetNext will Launch() the ExeTree on its first call by iterator (tree is already prepared) 54 // 2. GetNext will return empty row when eoe/eof is obtained 55 Status GetNext(TensorRow *); 56 57 // unique_ptr overloads operator bool(), will return false if it doesn't manage an object GetRoot()58 std::weak_ptr<DatasetOp> GetRoot() { return tree_ ? tree_->root() : nullptr; } 59 60 // This function will return the column_name_map once BuildAndPrepare() is called GetColumnNameMap()61 std::unordered_map<std::string, int32_t> GetColumnNameMap() const { return column_name_map_; } 62 63 // This function returns the TaskGroup associated with ExeTree. This is needed by DeviceQueueConsumer 64 // to be able to launch a thread. BuildAndPrepare needs to be called before this function AllTasks()65 TaskGroup *const AllTasks() const { return tree_ ? tree_->AllTasks() : nullptr; } 66 67 Status Launch(); 68 69 // Set optional optimization pass SetOptimize(bool value)70 void SetOptimize(bool value) { optimize_ = value; } 71 72 // Optional optimizations status OptimizationEnabled()73 bool OptimizationEnabled() const { return optimize_; } 74 75 private: 76 // Run the mandatory pass checking the syntax and semantics of the IR tree 77 Status PrePass(std::shared_ptr<DatasetNode> ir); 78 79 // Run the optional optimization pass on the IR tree 80 Status Optimize(std::shared_ptr<DatasetNode> ir); 81 82 // Run the mandatory pass augmenting the IR tree 83 Status PostPass(std::shared_ptr<DatasetNode> ir); 84 85 // Build an Execution tree 86 Status Build(std::shared_ptr<DatasetNode> root_ir); 87 88 // This RECURSIVE function walks the (optimized) IR tree in DFS to build its corresponding Execution tree. 89 Status BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *op); 90 91 std::unordered_map<std::string, int32_t> column_name_map_; 92 std::shared_ptr<DatasetNode> root_ir_; 93 std::unique_ptr<ExecutionTree> tree_; // current connector capacity of root op, used for profiling 94 bool optimize_; // Flag to enable optional optimization pass 95 #ifndef ENABLE_SECURITY 96 std::shared_ptr<DatasetIteratorTracing> tracing_; // trace profiling data 97 #endif 98 int32_t cur_batch_num_; // current batch number, used for profiling 99 int32_t cur_connector_size_; // current connector size of root op, used for profiling 100 int32_t cur_connector_capacity_; // current connector capacity of root op, used for profiling 101 UsageFlag usage_; // usage of this tree adapter (type of consumer) 102 bool launched_; 103 // State flags for the lifecycle of the tree 104 enum CompileState { 105 kCompileStateInit = 0, // The freshly initialized state 106 kCompileStateIRGraphBuilt, // User code has been parsed and its IR graph built 107 kCompileStateIRTreeCloned, // IR tree has been cloned from the IR graph 108 kCompileStateOptimized, // IR tree has been optimized 109 kCompileStateReady // Execution tree is generated from the optimized IR 110 }; 111 CompileState tree_state_; 112 }; 113 } // namespace dataset 114 } // namespace mindspore 115 116 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_TREE_ADAPTER_H_ 117