1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_ 19 20 #include <memory> 21 #include <string> 22 #include <vector> 23 24 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 25 #include "minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h" 26 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h" 27 #include "minddata/dataset/util/status.h" 28 29 namespace mindspore { 30 namespace dataset { 31 /// \class GeneratorNode 32 /// \brief A Dataset derived class to represent GeneratorNode dataset 33 class GeneratorNode : public MappableSourceNode { 34 public: 35 /// \brief Constructor 36 GeneratorNode(py::function generator_function, const std::vector<std::string> &column_names, 37 const std::vector<DataType> &column_types, int64_t source_len, std::shared_ptr<SamplerObj> sampler, 38 uint32_t num_parallel_workers); 39 40 /// \brief Constructor 41 GeneratorNode(py::function generator_function, const std::shared_ptr<SchemaObj> &schema, int64_t source_len, 42 std::shared_ptr<SamplerObj> sampler, uint32_t num_parallel_workers); 43 44 /// \brief Destructor 45 ~GeneratorNode() = default; 46 47 /// \brief Node name getter 48 /// \return Name of the current node Name()49 std::string Name() const override { return kGeneratorNode; } 50 51 /// \brief Print the description 52 /// \param out - The output stream to write output to 53 void Print(std::ostream &out) const override; 54 55 /// \brief Copy the node to a new object 56 /// \return A shared pointer to the new copy 57 std::shared_ptr<DatasetNode> Copy() override; 58 59 /// \brief a base class override function to create the required runtime dataset op objects for this class 60 /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create 61 /// \return Status Status::OK() if build successfully 62 Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override; 63 64 /// \brief Parameters validation 65 /// \return Status Status::OK() if all the parameters are valid 66 Status ValidateParams() override; 67 68 /// \brief Get the shard id of node, is always 0 because generator_node doesn't support sharding 69 /// \return Status Status::OK() if get shard id successfully 70 Status GetShardId(int32_t *shard_id) override; 71 IsSizeDefined()72 bool IsSizeDefined() override { return false; } 73 74 /// \brief Record the vector of Repeat/EpochCtrl nodes that are ancestors of this node 75 /// \param[in] the ancestor node 76 /// \return Status of the function AddResetAncestor(const std::shared_ptr<RepeatNode> & src)77 Status AddResetAncestor(const std::shared_ptr<RepeatNode> &src) { 78 CHECK_FAIL_RETURN_UNEXPECTED(reset_ancestor_ == nullptr, "Internal error: Overwriting an existing value"); 79 reset_ancestor_ = src; 80 return Status::OK(); 81 } 82 /// Returns the dataset size of GeneratorOp. If is mappable (sampler isn not null), the sampler is used. 83 /// Otherwise, a dry run is needed. 84 /// \param[in] size_getter TreeConsumer to be used for a dryrun 85 /// \param[in] estimate 86 /// \param[out] dataset_size 87 /// \return Status of the function 88 Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, 89 int64_t *dataset_size) override; 90 91 /// \brief Getter functions GeneratorFunction()92 const py::function &GeneratorFunction() const { return generator_function_; } ColumnNames()93 const std::vector<std::string> &ColumnNames() const { return column_names_; } ColumnTypes()94 const std::vector<DataType> &ColumnTypes() const { return column_types_; } Schema()95 const std::shared_ptr<SchemaObj> &Schema() const { return schema_; } 96 97 /// \brief Sampler getter 98 /// \return SamplerObj of the current node Sampler()99 std::shared_ptr<SamplerObj> Sampler() override { return sampler_; } 100 101 /// \brief Sampler setter SetSampler(std::shared_ptr<SamplerObj> sampler)102 void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; } 103 104 private: 105 py::function generator_function_; 106 std::vector<std::string> column_names_; 107 std::vector<DataType> column_types_; 108 std::shared_ptr<SchemaObj> schema_; 109 std::shared_ptr<RepeatNode> reset_ancestor_; // updated its immediate Repeat/EpochCtrl ancestor in GeneratorNodePass 110 std::shared_ptr<SamplerObj> sampler_; 111 uint32_t num_parallel_workers_; 112 int64_t source_len_; // Length of the dataset source provided by the user, -1 means it's unknown 113 114 /// \brief Base-class override for accepting IRNodePass visitor 115 /// \param[in] p The node to visit 116 /// \param[out] modified Indicator if the node was modified 117 /// \return Status of the node visit 118 Status Accept(IRNodePass *p, bool *const modified) override; 119 120 /// \brief Base-class override for accepting IRNodePass visitor 121 /// \param[in] p The node to visit 122 /// \param[out] modified Indicator if the node was modified 123 /// \return Status of the node visit 124 Status AcceptAfter(IRNodePass *p, bool *const modified) override; 125 }; 126 127 } // namespace dataset 128 } // namespace mindspore 129 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_ 130