1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_TEXT_FILE_NODE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_TEXT_FILE_NODE_H_ 19 20 #include <memory> 21 #include <string> 22 #include <vector> 23 24 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 25 26 namespace mindspore { 27 namespace dataset { 28 29 /// \class TextFileNode 30 /// \brief A Dataset derived class to represent TextFile dataset 31 class TextFileNode : public NonMappableSourceNode { 32 public: 33 /// \brief Constructor 34 TextFileNode(std::vector<std::string> dataset_files, int32_t num_samples, ShuffleMode shuffle, int32_t num_shards, 35 int32_t shard_id, std::shared_ptr<DatasetCache> cache); 36 37 /// \brief Destructor 38 ~TextFileNode() = default; 39 40 /// \brief Node name getter 41 /// \return Name of the current node Name()42 std::string Name() const override { return kTextFileNode; } 43 44 /// \brief Print the description 45 /// \param out - The output stream to write output to 46 void Print(std::ostream &out) const override; 47 48 /// \brief Copy the node to a new object 49 /// \return A shared pointer to the new copy 50 std::shared_ptr<DatasetNode> Copy() override; 51 52 /// \brief a base class override function to create the required runtime dataset op objects for this class 53 /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create 54 /// \return Status Status::OK() if build successfully 55 Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override; 56 57 /// \brief Parameters validation 58 /// \return Status Status::OK() if all the parameters are valid 59 Status ValidateParams() override; 60 61 /// \brief Get the shard id of node 62 /// \return Status Status::OK() if get shard id successfully 63 Status GetShardId(int32_t *shard_id) override; 64 65 /// \brief Base-class override for GetDatasetSize 66 /// \param[in] size_getter Shared pointer to DatasetSizeGetter 67 /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting 68 /// dataset size at the expense of accuracy. 69 /// \param[out] dataset_size the size of the dataset 70 /// \return Status of the function 71 Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, 72 int64_t *dataset_size) override; 73 74 /// \brief Getter functions DatasetFiles()75 const std::vector<std::string> &DatasetFiles() const { return dataset_files_; } NumSamples()76 int32_t NumSamples() const { return num_samples_; } NumShards()77 int32_t NumShards() const { return num_shards_; } ShardId()78 int32_t ShardId() const { return shard_id_; } Shuffle()79 ShuffleMode Shuffle() const { return shuffle_; } 80 81 /// \brief Get the arguments of node 82 /// \param[out] out_json JSON string of all attributes 83 /// \return Status of the function 84 Status to_json(nlohmann::json *out_json) override; 85 86 /// \brief Function to read dataset in json 87 /// \param[in] json_obj The JSON object to be deserialized 88 /// \param[out] ds Deserialized dataset 89 /// \return Status The status code returned 90 static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds); 91 92 /// \brief TextFile by itself is a non-mappable dataset that does not support sampling. 93 /// However, if a cache operator is injected at some other place higher in the tree, that cache can 94 /// inherit this sampler from the leaf, providing sampling support from the caching layer. 95 /// That is why we setup the sampler for a leaf node that does not use sampling. 96 /// Note: This function is common among NonMappableSourceNode and should be promoted to its parent class. 97 /// \param[in] sampler The sampler to setup 98 /// \return Status of the function 99 Status SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) override; 100 101 /// \brief If a cache has been added into the ascendant tree over this TextFile node, then the cache will be executing 102 /// a sampler for fetching the data. As such, any options in the TextFile node need to be reset to its defaults 103 /// so that this TextFile node will produce the full set of data into the cache. 104 /// Note: This function is common among NonMappableSourceNode and should be promoted to its parent class. 105 /// \return Status of the function 106 Status MakeSimpleProducer() override; 107 108 private: 109 std::vector<std::string> dataset_files_; 110 int32_t num_samples_; 111 int32_t num_shards_; 112 int32_t shard_id_; 113 ShuffleMode shuffle_; 114 }; 115 116 } // namespace dataset 117 } // namespace mindspore 118 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_TEXT_FILE_NODE_H_ 119