• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_
19 
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
25 #include "minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h"
26 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
27 #include "minddata/dataset/util/status.h"
28 
29 namespace mindspore {
30 namespace dataset {
31 /// \class GeneratorNode
32 /// \brief A Dataset derived class to represent GeneratorNode dataset
33 class GeneratorNode : public MappableSourceNode {
34  public:
35   /// \brief Constructor
36   GeneratorNode(py::function generator_function, const std::vector<std::string> &column_names,
37                 const std::vector<DataType> &column_types, int64_t source_len, std::shared_ptr<SamplerObj> sampler,
38                 uint32_t num_parallel_workers);
39 
40   /// \brief Constructor
41   GeneratorNode(py::function generator_function, const std::shared_ptr<SchemaObj> &schema, int64_t source_len,
42                 std::shared_ptr<SamplerObj> sampler, uint32_t num_parallel_workers);
43 
44   /// \brief Destructor
45   ~GeneratorNode() = default;
46 
47   /// \brief Node name getter
48   /// \return Name of the current node
Name()49   std::string Name() const override { return kGeneratorNode; }
50 
51   /// \brief Print the description
52   /// \param out - The output stream to write output to
53   void Print(std::ostream &out) const override;
54 
55   /// \brief Copy the node to a new object
56   /// \return A shared pointer to the new copy
57   std::shared_ptr<DatasetNode> Copy() override;
58 
59   /// \brief a base class override function to create the required runtime dataset op objects for this class
60   /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
61   /// \return Status Status::OK() if build successfully
62   Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
63 
64   /// \brief Parameters validation
65   /// \return Status Status::OK() if all the parameters are valid
66   Status ValidateParams() override;
67 
68   /// \brief Get the shard id of node, is always 0 because generator_node doesn't support sharding
69   /// \return Status Status::OK() if get shard id successfully
70   Status GetShardId(int32_t *shard_id) override;
71 
IsSizeDefined()72   bool IsSizeDefined() override { return false; }
73 
74   /// \brief Record the vector of Repeat/EpochCtrl nodes that are ancestors of this node
75   /// \param[in] the ancestor node
76   /// \return Status of the function
AddResetAncestor(const std::shared_ptr<RepeatNode> & src)77   Status AddResetAncestor(const std::shared_ptr<RepeatNode> &src) {
78     CHECK_FAIL_RETURN_UNEXPECTED(reset_ancestor_ == nullptr, "Internal error: Overwriting an existing value");
79     reset_ancestor_ = src;
80     return Status::OK();
81   }
82   /// Returns the dataset size of GeneratorOp. If is mappable (sampler isn not null), the sampler is used.
83   /// Otherwise, a dry run is needed.
84   /// \param[in] size_getter TreeConsumer to be used for a dryrun
85   /// \param[in] estimate
86   /// \param[out] dataset_size
87   /// \return Status of the function
88   Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
89                         int64_t *dataset_size) override;
90 
91   /// \brief Getter functions
GeneratorFunction()92   const py::function &GeneratorFunction() const { return generator_function_; }
ColumnNames()93   const std::vector<std::string> &ColumnNames() const { return column_names_; }
ColumnTypes()94   const std::vector<DataType> &ColumnTypes() const { return column_types_; }
Schema()95   const std::shared_ptr<SchemaObj> &Schema() const { return schema_; }
96 
97   /// \brief Sampler getter
98   /// \return SamplerObj of the current node
Sampler()99   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
100 
101   /// \brief Sampler setter
SetSampler(std::shared_ptr<SamplerObj> sampler)102   void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
103 
104  private:
105   py::function generator_function_;
106   std::vector<std::string> column_names_;
107   std::vector<DataType> column_types_;
108   std::shared_ptr<SchemaObj> schema_;
109   std::shared_ptr<RepeatNode> reset_ancestor_;  // updated its immediate Repeat/EpochCtrl ancestor in GeneratorNodePass
110   std::shared_ptr<SamplerObj> sampler_;
111   uint32_t num_parallel_workers_;
112   int64_t source_len_;  // Length of the dataset source provided by the user, -1 means it's unknown
113 
114   /// \brief Base-class override for accepting IRNodePass visitor
115   /// \param[in] p The node to visit
116   /// \param[out] modified Indicator if the node was modified
117   /// \return Status of the node visit
118   Status Accept(IRNodePass *p, bool *const modified) override;
119 
120   /// \brief Base-class override for accepting IRNodePass visitor
121   /// \param[in] p The node to visit
122   /// \param[out] modified Indicator if the node was modified
123   /// \return Status of the node visit
124   Status AcceptAfter(IRNodePass *p, bool *const modified) override;
125 };
126 
127 }  // namespace dataset
128 }  // namespace mindspore
129 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_GENERATOR_NODE_H_
130