• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "minddata/dataset/core/config_manager.h"
27 #include "minddata/dataset/engine/consumers/tree_consumer.h"
28 #include "minddata/dataset/engine/data_schema.h"
29 #include "minddata/dataset/engine/datasetops/dataset_op.h"
30 #include "minddata/dataset/engine/datasetops/filter_op.h"
31 #include "minddata/dataset/engine/datasetops/map_op/map_op.h"
32 #include "minddata/dataset/engine/datasetops/project_op.h"
33 #include "minddata/dataset/engine/datasetops/repeat_op.h"
34 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
35 #include "minddata/dataset/engine/datasetops/skip_op.h"
36 #include "minddata/dataset/engine/datasetops/take_op.h"
37 #include "minddata/dataset/engine/ir/cache/dataset_cache.h"
38 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
39 #include "minddata/dataset/include/dataset/datasets.h"
40 #include "minddata/dataset/kernels/ir/validators.h"
41 #include "minddata/dataset/util/path.h"
42 #include "minddata/dataset/util/status.h"
43 #include "minddata/dataset/util/validators.h"
44 
45 namespace mindspore {
46 namespace dataset {
47 
48 class Dataset;
49 class DatasetCache;
50 class SamplerObj;
51 class IRNodePass;
52 class DatasetSizeGetter;
53 
54 // Names for non-leaf IR node
55 constexpr char kBatchNode[] = "Batch";
56 constexpr char kBucketBatchByLengthNode[] = "BucketBatchByLength";
57 constexpr char kBuildSentencePieceVocabNode[] = "BuildSentencePieceVocab";
58 constexpr char kBuildVocabNode[] = "BuildVocab";
59 constexpr char kCacheLookupNode[] = "CacheLookup";
60 constexpr char kCacheMergeNode[] = "CacheMerge";
61 constexpr char kCacheNode[] = "Cache";
62 constexpr char kConcatNode[] = "Concat";
63 constexpr char kEpochCtrlNode[] = "EpochCtrl";
64 constexpr char kFilterNode[] = "Filter";
65 constexpr char kMapNode[] = "Map";
66 constexpr char kProjectNode[] = "Project";
67 constexpr char kRenameNode[] = "Rename";
68 constexpr char kRepeatNode[] = "Repeat";
69 constexpr char kRootNode[] = "Top";
70 constexpr char kShuffleNode[] = "Shuffle";
71 constexpr char kSkipNode[] = "Skip";
72 constexpr char kSyncWaitNode[] = "SyncWait";
73 constexpr char kTakeNode[] = "Take";
74 constexpr char kTransferNode[] = "Transfer";
75 constexpr char kZipNode[] = "Zip";
76 
77 // Names for leaf IR node
78 constexpr char kAGNewsNode[] = "AGNewsDataset";
79 constexpr char kAlbumNode[] = "AlbumDataset";
80 constexpr char kAmazonReviewNode[] = "AmazonReviewDataset";
81 constexpr char kCaltech256Node[] = "Caltech256Dataset";
82 constexpr char kCelebANode[] = "CelebADataset";
83 constexpr char kCifar100Node[] = "Cifar100Dataset";
84 constexpr char kCifar10Node[] = "Cifar10Dataset";
85 constexpr char kCityscapesNode[] = "CityscapesDataset";
86 constexpr char kCLUENode[] = "CLUEDataset";
87 constexpr char kCMUArcticNode[] = "CMUArcticDataset";
88 constexpr char kCocoNode[] = "CocoDataset";
89 constexpr char kCoNLL2000Node[] = "CoNLL2000Dataset";
90 constexpr char kCSVNode[] = "CSVDataset";
91 constexpr char kDBpediaNode[] = "DBpediaDataset";
92 constexpr char kDIV2KNode[] = "DIV2KDataset";
93 constexpr char kEMnistNode[] = "EMnistDataset";
94 constexpr char kEnWik9Node[] = "EnWik9Dataset";
95 constexpr char kFakeImageNode[] = "FakeImageDataset";
96 constexpr char kFashionMnistNode[] = "FashionMnistDataset";
97 constexpr char kFlickrNode[] = "FlickrDataset";
98 constexpr char kFood101Node[] = "Food101Dataset";
99 constexpr char kGeneratorNode[] = "GeneratorDataset";
100 constexpr char kGTZANNode[] = "GTZANDataset";
101 constexpr char kImageFolderNode[] = "ImageFolderDataset";
102 constexpr char kIMDBNode[] = "IMDBDataset";
103 constexpr char kIWSLT2016Node[] = "IWSLT2016Dataset";
104 constexpr char kIWSLT2017Node[] = "IWSLT2017Dataset";
105 constexpr char kKITTINode[] = "KITTIDataset";
106 constexpr char kKMnistNode[] = "KMnistDataset";
107 constexpr char kLFWNode[] = "LFWDataset";
108 constexpr char kLibriTTSNode[] = "LibriTTSDataset";
109 constexpr char kLJSpeechNode[] = "LJSpeechDataset";
110 constexpr char kLSUNNode[] = "LSUNDataset";
111 constexpr char kManifestNode[] = "ManifestDataset";
112 constexpr char kMindDataNode[] = "MindDataDataset";
113 constexpr char kMnistNode[] = "MnistDataset";
114 constexpr char kMulti30kNode[] = "Multi30kDataset";
115 constexpr char kOmniglotNode[] = "OmniglotDataset";
116 constexpr char kPennTreebankNode[] = "PennTreebankDataset";
117 constexpr char kPhotoTourNode[] = "PhotoTourDataset";
118 constexpr char kPlaces365Node[] = "Places365Dataset";
119 constexpr char kQMnistNode[] = "QMnistDataset";
120 constexpr char kRandomNode[] = "RandomDataset";
121 constexpr char kRenderedSST2Node[] = "RenderedSST2Dataset";
122 constexpr char kSBUNode[] = "SBUDataset";
123 constexpr char kSemeionNode[] = "SemeionDataset";
124 constexpr char kSogouNewsNode[] = "SogouNewsDataset";
125 constexpr char kSpeechCommandsNode[] = "SpeechCommandsDataset";
126 constexpr char kSQuADNode[] = "SQuADDataset";
127 constexpr char kSST2Node[] = "SST2Dataset";
128 constexpr char kSTL10Node[] = "STL10Dataset";
129 constexpr char kSUN397Node[] = "SUN397Dataset";
130 constexpr char kTedliumNode[] = "TedliumDataset";
131 constexpr char kTextFileNode[] = "TextFileDataset";
132 constexpr char kTFRecordNode[] = "TFRecordDataset";
133 constexpr char kUDPOSNode[] = "UDPOSDataset";
134 constexpr char kUSPSNode[] = "USPSDataset";
135 constexpr char kVOCNode[] = "VOCDataset";
136 constexpr char kWIDERFaceNode[] = "WIDERFaceDataset";
137 constexpr char kWikiTextNode[] = "WikiTextDataset";
138 constexpr char kYahooAnswersNode[] = "YahooAnswersDataset";
139 constexpr char kYelpReviewNode[] = "YelpReviewDataset";
140 constexpr char kYesNoNode[] = "YesNoDataset";
141 
142 Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
143                     int32_t connector_que_size, std::shared_ptr<ShuffleOp> *shuffle_op);
144 
145 // Helper function to validate dataset files parameter
146 Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files,
147                                  const std::string &file_name = "dataset file");
148 
149 // Helper function to validate dataset num_shards and shard_id parameters
150 Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id);
151 
152 // Helper function to validate dataset sampler parameter
153 Status ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler);
154 
155 Status ValidateStringValue(const std::string &dataset_name, const std::string &str,
156                            const std::unordered_set<std::string> &valid_strings);
157 
158 // Helper function to validate dataset input/output column parameterCD -
159 Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
160                                   const std::vector<std::string> &columns);
161 
162 // Helper function to validate dataset directory parameter
163 Status ValidateDatasetDirParam(const std::string &dataset_name, std::string dataset_dir);
164 
165 Status ValidateMapKey(const std::string &dataset_name, const std::string &key,
166                       const std::map<std::string, std::vector<std::string>> &map);
167 
168 Status ValidateMapValue(const std::string &dataset_name, const std::string &str,
169                         const std::vector<std::string> &valid_strings);
170 
171 /// \brief Function to create a sampler for non-mappable dataset (to be used by cache op later).
172 /// \notes Non-mappable dataset does not directly support a sampler. It has provided sampling arguments (shuffle,
173 ///     num_samples, num_shards, shard_id) and it DOES support sampling if somewhere above it in the pipeline contains
174 ///     a cache. If there is no cache above it, then the sampler is not used.
175 /// \param[in] num_samples The number of samples to be included in the dataset.
176 /// \param[in] shuffle If true, the indices are shuffled.
177 /// \param[in] num_shards Number of shards to divide the dataset into.
178 /// \param[in] shard_id Shard ID of the current shard within num_shards.
179 /// \return Shared pointer to the current Sampler.
180 std::shared_ptr<SamplerObj> SelectSampler(int64_t num_samples, bool shuffle, int32_t num_shards, int32_t shard_id);
181 
182 // The base class of all IR nodes
183 class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
184   // Allow DeepCopyPass to access internal members
185   friend class DeepCopyPass;
186 
187  public:
188   /// \brief Constructor
189   DatasetNode();
190 
191   /// \brief Constructor that initializes the cache
192   /// \param dataset_cache DatasetCache
193   explicit DatasetNode(const std::shared_ptr<DatasetCache> &dataset_cache);
194 
195   /// \brief Destructor
196   virtual ~DatasetNode() = default;
197 
198   /// \brief Node name getter
199   /// \return Name of the current node
200   virtual std::string Name() const = 0;
201 
202   /// \brief Pure virtual function to print the description
203   /// \param out - The output stream to write output to
204   virtual void Print(std::ostream &out) const = 0;
205 
206   /// \brief Pure virtual function to clone a new copy of the node
207   /// \return The new copy of the node
208   virtual std::shared_ptr<DatasetNode> Copy() = 0;
209 
210   /// \brief Print the IR tree to output stream
211   /// \param out - The output stream to write output to
212   void PrintTree(std::ostream &out) const;
213 
214   /// \brief << Stream output operator overload
215   /// \notes This allows you to write the debug print info using stream operators
216   /// \param out - reference to the output stream being overloaded
217   /// \param node - reference to the DatasetNode to display
218   /// \return - the output stream must be returned
219   friend std::ostream &operator<<(std::ostream &out, const DatasetNode &node) {
220     node.PrintTree(out);
221     return out;
222   }
223 
224   /// \brief Pure virtual function to convert a DatasetNode class into a runtime dataset object
225   /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
226   /// \return Status Status::OK() if build successfully
227   virtual Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) = 0;
228 
229   /// \brief base virtual function for derived class to implement parameters validation
230   /// \return Status Status::OK() if all the parameters are valid
231   virtual Status ValidateParams();
232 
233   /// \brief Pure virtual function for derived class to get the shard id of specific node
234   /// \return Status Status::OK() if get shard id successfully
235   virtual Status GetShardId(int32_t *const shard_id);
236 
237   /// \brief Gets the dataset size
238   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
239   /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
240   ///     dataset size at the expense of accuracy.
241   /// \return Status - The status code return
242   virtual Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
243                                 int64_t *dataset_size);
244 
245   /// \brief Getter function for child nodes
246   /// \return Child nodes
Children()247   const std::vector<std::shared_ptr<DatasetNode>> Children() const { return children_; }
248 
249   /// \brief Get the parent dataset node.
250   /// \return The parent dataset node.
Parent()251   DatasetNode *Parent() const { return parent_; }
252 
253   /// \brief Establish a parent-child relationship between this node and the input node.
254   ///    Used during the cloning of the user-input IR tree (temporary use)
255   Status AppendChild(std::shared_ptr<DatasetNode> child);
256 
257   /// \brief Insert the input <node> above this node
258   Status InsertAbove(std::shared_ptr<DatasetNode> node);
259 
260   /// \brief Add the input node as the next sibling (future use)
261   Status InsertChildAt(int32_t pos, std::shared_ptr<DatasetNode> node);
262 
263   /// \brief detach this node from its parent, add its child (if any) to its parent
264   /// \return error code, return error if node has more than 1 children
265   Status Drop();
266 
267   /// \brief Check if this node has cache
268   /// \return True if the data of this node will be cached
IsCached()269   const bool IsCached() const { return (cache_ != nullptr); }
270 
271   /// \brief Check if this node is a leaf node.
272   /// \return True if this is a leaf node.
IsLeaf()273   const bool IsLeaf() const { return children_.empty(); }
274 
275   /// \brief Check if this node is a unary operator node.
276   /// \return True if this node is semantically a unary operator node
IsUnaryOperator()277   const bool IsUnaryOperator() const { return (mappable_ == kNotADataSource && !nary_op_); }
278 
279   /// \brief Check if this node is a n-ary operator node.
280   /// \return True if this node is semantically a n-ary operator node
IsNaryOperator()281   const bool IsNaryOperator() const { return (mappable_ == kNotADataSource && nary_op_); }
282 
283   /// \brief Check if this node is a mappable dataset. Only applicable to leaf nodes
284   /// \return True if this node is a mappable dataset
IsMappableDataSource()285   const bool IsMappableDataSource() const { return (mappable_ == kMappableSource); }
286 
287   /// \brief Check if this node is a non-mappable dataset. Only applicable to leaf nodes
288   /// \return True if this node is a non-mappable dataset
IsNonMappableDataSource()289   const bool IsNonMappableDataSource() const { return (mappable_ == kNonMappableSource); }
290 
291   /// \brief Check if this node is a data source node.
292   /// \return True if this node is a data source node
IsDataSource()293   const bool IsDataSource() const { return (mappable_ == kMappableSource || mappable_ == kNonMappableSource); }
294 
295   /// \brief Check if this node is not a data source node.
296   /// \return True if this node is not a data source node
IsNotADataSource()297   const bool IsNotADataSource() const { return (mappable_ == kNotADataSource); }
298 
299   /// \brief Check if this node is a descendant of an operator with cache.
300   /// \return True if a cache-enabled operator is an ancestor of this node
IsDescendantOfCache()301   const bool IsDescendantOfCache() const { return descendant_of_cache_; }
302 
303   /// \brief Check if this node is an orphan node
304   /// \return True if this node isn't nullptr nor does it have any children and a parent
IsOrphanNode(const std::shared_ptr<DatasetNode> & node)305   static bool IsOrphanNode(const std::shared_ptr<DatasetNode> &node) {
306     return node != nullptr && node->parent_ == nullptr && node->Children().empty();
307   }
308 
309   /// \brief Mark to indicate this node is a descendant of an operator with cache.
HasCacheAbove()310   void HasCacheAbove() { descendant_of_cache_ = true; }
311 
312   /// \brief Getter of the number of workers
NumWorkers()313   int32_t NumWorkers() const { return num_workers_; }
314 
315   /// \brief Getter of the connector queue size
ConnectorQueueSize()316   int32_t ConnectorQueueSize() const { return connector_que_size_; }
317 
318   /// \brief Getter of dataset cache
GetDatasetCache()319   std::shared_ptr<DatasetCache> GetDatasetCache() { return cache_; }
320 
321   /// \brief Setter function for runtime number of workers
322   /// \param[in] num_workers The number of threads in this operator
323   /// \return Shared pointer to the original object
324   std::shared_ptr<DatasetNode> SetNumWorkers(int32_t num_workers);
325 
326   std::shared_ptr<DatasetNode> SetConnectorQueueSize(int32_t connector_queue_size);
327 
328   /// \brief Setter function for DatasetCache
329   /// \param[in] cache Shared pointer to DatasetCache
330   /// \return Shared pointer to the original object
331   std::shared_ptr<DatasetNode> SetDatasetCache(const std::shared_ptr<DatasetCache> &cache);
332 
333   /// \brief Setter function for descendant_of_cache_
334   /// \param[in] descendant_of_cache Indicator for whether this node is a descendant of cache.
setDescendantOfCache(bool descendant_of_cache)335   void setDescendantOfCache(bool descendant_of_cache) { descendant_of_cache_ = descendant_of_cache; }
336 
337   /// \brief A helper templated function for casting "this" pointer to shared_ptr<derived>
338   ///     Similar to shared_from_this, except this one will give you the derived class as shared_ptr
339   /// \return A shared_ptr casted to the derived class
340   template <typename Derived>
shared_from_base()341   std::shared_ptr<Derived> shared_from_base() {
342     return std::static_pointer_cast<Derived>(shared_from_this());
343   }
344 
345   /// \brief Base method for IRNodePass visit. A tree walk consists of walking down the tree and also walking back up
346   ///     in a depth-first order. Accept is the node visit on the way down, whereas AcceptAfter is the node
347   ///     visit on the way back up the tree after its descendants are visited.
348   /// \notes Subclass needs to override this if it requires special node visit access.
349   ///     Check "dataset/engine/opt/pass.h" for more details.
350   /// \param[in] p The node to visit
351   /// \param[out] modified Indicator if the node was modified
352   /// \return Status of the node visit
353   virtual Status Accept(IRNodePass *const p, bool *const modified);
354 
355   /// \brief Base method for IRNodePass visit on the way back up the tree after its descendants are visited.
356   /// \notes Subclass needs to override this if it requires special node visit access.
357   ///     Check "dataset/engine/opt/pass.h" for more details.
358   /// \param[in] p The node to visit
359   /// \param[out] modified Indicator if the node was modified
360   /// \return Status of the node visit
361   virtual Status AcceptAfter(IRNodePass *const p, bool *const modified);
362 
IsSizeDefined()363   virtual bool IsSizeDefined() { return true; }
364 
365   /// \brief Get the arguments of node
366   /// \param[out] out_json JSON string of all attributes
367   /// \return Status of the function
368   virtual Status to_json(nlohmann::json *out_json);
369 
370   /// \brief Setter function, set the number of total repeats for the operator
SetTotalRepeats(int32_t total_repeats)371   void SetTotalRepeats(int32_t total_repeats) { total_repeats_ = total_repeats; }
372 
373   /// \brief Setter function, set the number of epochs for the operator
SetNumEpochs(int32_t num_epochs)374   virtual void SetNumEpochs(int32_t num_epochs) { num_epochs_ = num_epochs; }
375 
376   /// \brief Getter function
377   /// \return The number of required repeats for the operator
GetTotalRepeats()378   int32_t GetTotalRepeats() const { return total_repeats_; }
379 
380   /// \brief Getter function
381   /// \return The number of epochs for the operator
GetNumEpochs()382   int32_t GetNumEpochs() const { return num_epochs_; }
383 
384   /// \brief Getter function
385   /// \return The number of repeats per epoch for the operator
GetNumRepeatsPerEpoch()386   int32_t GetNumRepeatsPerEpoch() const { return total_repeats_ / num_epochs_; }
387 
388  protected:
389   std::vector<std::shared_ptr<DatasetNode>> children_;
390   DatasetNode *parent_;  // used to record the only one parent of an IR node after parsing phase
391   std::shared_ptr<DatasetCache> cache_;
392   int64_t dataset_size_;
393   int32_t num_workers_;
394   int32_t connector_que_size_;
395   int32_t worker_connector_size_;
396   int32_t total_repeats_;  // Number of times required to run this operator
397   int32_t num_epochs_;     // Number of epochs
398   // Establish a parent-child relationship between this node and the input node.
399   // Used only in the constructor of the class and its derived classes.
400   void AddChild(std::shared_ptr<DatasetNode> child);
401   std::string PrintColumns(const std::vector<std::string> &columns) const;
402   void PrintNode(std::ostream &out, int *level) const;
403   enum DataSource { kNotADataSource = 0, kNonMappableSource = 1, kMappableSource = 2 };
404   enum DataSource mappable_;
405   bool nary_op_;  // an indicator of whether the current node supports multiple children, true for concat/zip node
406   bool descendant_of_cache_;  // an indicator of whether the current node is a descendant of cache.
407                               // Initially set to false, will set to true by the optimizer when conditions are met.
408 };
409 
410 // MappableSourceNode represents the leaf nodes that can be randomly accessed with indexes.
411 class MappableSourceNode : public DatasetNode {
412  public:
413   /// \brief Constructor
MappableSourceNode()414   MappableSourceNode() : DatasetNode() { mappable_ = kMappableSource; }
415 
416   /// \brief Constructor that initializes the cache
417   /// \param dataset_cache DatasetCache
MappableSourceNode(const std::shared_ptr<DatasetCache> & dataset_cache)418   explicit MappableSourceNode(const std::shared_ptr<DatasetCache> &dataset_cache) : DatasetNode(dataset_cache) {
419     mappable_ = kMappableSource;
420     // Initially set to false, and set to true by the optimizer when conditions are met.
421     descendant_of_cache_ = false;
422   }
423 
424   Status Accept(IRNodePass *const p, bool *const modified) override;
425 
426   /// \brief Destructor
427   virtual ~MappableSourceNode() = default;
428 
429   /// \brief Sampler getter
430   /// \return SamplerObj of the current node
431   virtual std::shared_ptr<SamplerObj> Sampler() = 0;
432 
433   /// \brief Sampler setter
434   virtual void SetSampler(std::shared_ptr<SamplerObj> sampler) = 0;
435 };
436 
437 // NonMappableSourceNode represents the leaf nodes that can not be randomly accessed.
438 class NonMappableSourceNode : public DatasetNode {
439  public:
440   /// \brief Constructor
NonMappableSourceNode()441   NonMappableSourceNode() : DatasetNode() { mappable_ = kNonMappableSource; }
442 
443   /// \brief Constructor that initializes the cache
444   /// \param dataset_cache DatasetCache
NonMappableSourceNode(const std::shared_ptr<DatasetCache> & dataset_cache)445   explicit NonMappableSourceNode(const std::shared_ptr<DatasetCache> &dataset_cache) : DatasetNode(dataset_cache) {
446     mappable_ = kNonMappableSource;
447     // Initially set to false, and set to true by the optimizer when conditions are met.
448     descendant_of_cache_ = false;
449   }
450 
451   Status Accept(IRNodePass *const p, bool *const modified) override;
452 
453   /// \brief Destructor
454   virtual ~NonMappableSourceNode() = default;
455 
456   /// \brief By default non-mappable dataset does not support sampling. However, if a cache operator
457   ///     is injected at some other place higher in the tree, that cache can inherit this sampler
458   ///     from the leaf, providing sampling support from the caching layer.
459   ///     This function sets up the sampler for a leaf node that does not use sampling.
460   /// \param[in] sampler The sampler to setup
461   /// \return Status of the function
462   virtual Status SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) = 0;
463 
464   /// \brief If a cache has been added into the ascendant tree over this non-mappable source node, then the cache will
465   ///     be executing a sampler for fetching the data. As such, any options in the source node need to be reset to its
466   ///     defaults so that this source node will produce the full set of data into the cache.
467   /// \return Status of the function
468   virtual Status MakeSimpleProducer() = 0;
469 
SetSkipSteps(int64_t skip_steps)470   void SetSkipSteps(int64_t skip_steps) { skip_steps_ = skip_steps; }
471 
472  protected:
473   int64_t skip_steps_ = 0;
474 };
475 }  // namespace dataset
476 }  // namespace mindspore
477 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
478