1 /** 2 * Copyright 2019-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SEQUENTIAL_SAMPLER_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SEQUENTIAL_SAMPLER_H_ 18 19 #include <limits> 20 #include <memory> 21 22 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 23 24 namespace mindspore { 25 namespace dataset { 26 class SequentialSamplerRT : public SamplerRT { 27 public: 28 // Constructor 29 // @param start_index - The starting index value 30 // @param num_samples - The number of samples to draw. A value of 0 indicates the sampler should produce the 31 // full amount of ids from the dataset 32 // @param int64_t samples_per_tensor - Num of Sampler Ids to fetch via 1 GetNextSample call 33 SequentialSamplerRT(int64_t start_index, int64_t num_samples, 34 int64_t samples_per_tensor = std::numeric_limits<int64_t>::max()); 35 36 // Destructor. 37 ~SequentialSamplerRT() = default; 38 39 // init sampler, called by python 40 Status InitSampler() override; 41 42 /// \brief Reset for next epoch. 43 /// \param[in] failover_reset A boolean to show whether we are resetting the pipeline 44 /// \return Status The status code returned 45 Status ResetSampler(const bool failover_reset) override; 46 47 // Op calls this to get next Sample that contains all the sampleIds 48 // @param TensorRow to be returned to corresponding Dataset Op 49 // @param int32_t workerId - not meant to be used 50 // @return Status The status code returned 51 Status GetNextSample(TensorRow *out) override; 52 53 /// \brief Recursively calls this function on its children to get the actual number of samples on a tree of samplers 54 /// \note This is not a getter for num_samples_. For example, if num_samples_ is 0 or if it's smaller than num_rows, 55 /// then num_samples_ is not returned at all. 56 /// \param[in] num_rows The total number of rows in the dataset 57 /// \return int64_t Calculated number of samples 58 int64_t CalculateNumSamples(int64_t num_rows) override; 59 60 // Printer for debugging purposes. 61 // @param out - output stream to write to 62 // @param show_all - bool to show detailed vs summary 63 void SamplerPrint(std::ostream &out, bool show_all) const override; 64 65 /// \brief Get the arguments of node 66 /// \param[out] out_json JSON string of all attributes 67 /// \return Status of the function 68 Status to_json(nlohmann::json *out_json) override; 69 70 protected: 71 int64_t current_index_; // The id sequencer. Each new id increments from this 72 int64_t start_index_; // The starting id. current_id_ begins from here. 73 int64_t index_produced_; // An internal counter that tracks how many ids have been produced 74 }; 75 } // namespace dataset 76 } // namespace mindspore 77 78 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SEQUENTIAL_SAMPLER_H_ 79