• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_MINDDATA_NODE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_MINDDATA_NODE_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
26 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
27 
28 namespace mindspore {
29 namespace dataset {
30 
31 class MindDataNode : public MappableSourceNode {
32  public:
33   /// \brief Constructor
34   MindDataNode(const std::vector<std::string> &dataset_files, const std::vector<std::string> &columns_list,
35                const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample, int64_t num_padded,
36                ShuffleMode shuffle_mode = ShuffleMode::kGlobal, std::shared_ptr<DatasetCache> cache = nullptr);
37 
38   /// \brief Constructor
39   MindDataNode(const std::string &dataset_file, const std::vector<std::string> &columns_list,
40                const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample, int64_t num_padded,
41                ShuffleMode shuffle_mode = ShuffleMode::kGlobal, std::shared_ptr<DatasetCache> cache = nullptr);
42 
43   /// \brief Destructor
44   ~MindDataNode() = default;
45 
46   /// \brief Node name getter
47   /// \return Name of the current node
Name()48   std::string Name() const override { return kMindDataNode; }
49 
50   /// \brief Print the description
51   /// \param out - The output stream to write output to
52   void Print(std::ostream &out) const override;
53 
54   /// \brief Copy the node to a new object
55   /// \return A shared pointer to the new copy
56   std::shared_ptr<DatasetNode> Copy() override;
57 
58   /// \brief a base class override function to create the required runtime dataset op objects for this class
59   /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
60   /// \return Status Status::OK() if build successfully
61   Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
62 
63   /// \brief Parameters validation
64   /// \return Status Status::OK() if all the parameters are valid
65   Status ValidateParams() override;
66 
67   /// \brief Get the shard id of node
68   /// \return Status Status::OK() if get shard id successfully
69   Status GetShardId(int32_t *shard_id) override;
70 
71   /// \brief Build sampler chain for minddata dataset
72   /// \return Status Status::OK() if input sampler is valid
73   Status BuildMindDatasetSamplerChain(const std::shared_ptr<SamplerObj> &sampler,
74                                       std::vector<std::shared_ptr<mindrecord::ShardOperator>> *operators_,
75                                       int64_t num_padded, ShuffleMode shuffle_mode);
76 
77   /// \brief Set sample_bytes when padded_sample has py::byte value
78   /// \note Pybind will use this function to set sample_bytes into MindDataNode
79   void SetSampleBytes(std::map<std::string, std::string> *sample_bytes);
80 
81   /// \brief Base-class override for GetDatasetSize
82   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
83   /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
84   ///     dataset size at the expense of accuracy.
85   /// \param[out] dataset_size the size of the dataset
86   /// \return Status of the function
87   Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
88                         int64_t *dataset_size) override;
89 
90   /// \brief Sampler getter
91   /// \return SamplerObj of the current node
Sampler()92   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
93 
94   /// \brief Sampler setter
SetSampler(std::shared_ptr<SamplerObj> sampler)95   void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
96 
97   /// \brief Base-class override for accepting IRNodePass visitor
98   /// \param[in] p The node to visit
99   /// \param[out] modified Indicator if the node was modified
100   /// \return Status of the node visit
101   Status Accept(IRNodePass *const p, bool *const modified) override;
102 
103   /// \brief Base-class override for accepting IRNodePass visitor
104   /// \param[in] p The node to visit
105   /// \param[out] modified Indicator if the node was modified
106   /// \return Status of the node visit
107   Status AcceptAfter(IRNodePass *const p, bool *const modified) override;
108 
109  private:
110   std::string dataset_file_;                // search_for_pattern_ will be true in this mode
111   std::vector<std::string> dataset_files_;  // search_for_pattern_ will be false in this mode
112   bool search_for_pattern_;
113   std::vector<std::string> columns_list_;
114   std::shared_ptr<SamplerObj> input_sampler_;  // The sampler from users input, will be used to create a set of shard
115                                                // operators.
116   std::shared_ptr<SamplerObj> sampler_;        // An auto-created sampler, IR of runtime MindRecordSamplerRT sampler
117   nlohmann::json padded_sample_;
118   std::map<std::string, std::string> sample_bytes_;  // enable in python
119   int64_t num_padded_;
120   std::vector<std::shared_ptr<ShardOperator>> operators_;
121   ShuffleMode shuffle_mode_;
122 };
123 
124 }  // namespace dataset
125 }  // namespace mindspore
126 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_MINDDATA_NODE_H_
127