1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_ 19 20 #include <memory> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 #include "minddata/mindrecord/include/shard_operator.h" 25 #include "minddata/mindrecord/include/shard_shuffle.h" 26 27 namespace mindspore { 28 namespace mindrecord { 29 class MINDRECORD_API ShardSample : public ShardOperator { 30 public: 31 explicit ShardSample(int64_t n); 32 33 ShardSample(int64_t num, int64_t den); 34 35 ShardSample(int64_t num, int64_t den, int64_t par, int64_t no_of_samples = 0, int64_t offset = -1); 36 37 explicit ShardSample(const std::vector<int64_t> &indices); 38 39 ShardSample(const std::vector<int64_t> &indices, uint32_t seed); 40 ~ShardSample()41 ~ShardSample() override{}; 42 43 Status Execute(ShardTaskList &tasks) override; 44 45 Status UpdateTasks(ShardTaskList &tasks, int64_t taking); // NOLINT 46 47 Status SufExecute(ShardTaskList &tasks) override; 48 49 int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override; 50 51 private: 52 // Update the partition_shard_sample_count_ in tasks 53 Status UpdatePartitionWhenSlowMode(ShardTaskList &tasks); // NOLINT 54 55 protected: 56 int64_t numerator_; 57 int64_t denominator_; 58 int64_t partition_id_; 59 int64_t no_of_samples_; 60 std::shared_ptr<ShardShuffle> shuffle_op_; 61 std::vector<int64_t> nums_per_shard_; 62 63 private: 64 std::vector<int64_t> indices_; 65 SamplerType sampler_type_; 66 int64_t offset_; 67 }; 68 } // namespace mindrecord 69 } // namespace mindspore 70 71 #endif // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_ 72