• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_
19 
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "minddata/mindrecord/include/shard_operator.h"
25 #include "minddata/mindrecord/include/shard_shuffle.h"
26 
27 namespace mindspore {
28 namespace mindrecord {
29 class MINDRECORD_API ShardSample : public ShardOperator {
30  public:
31   explicit ShardSample(int64_t n);
32 
33   ShardSample(int64_t num, int64_t den);
34 
35   ShardSample(int64_t num, int64_t den, int64_t par, int64_t no_of_samples = 0, int64_t offset = -1);
36 
37   explicit ShardSample(const std::vector<int64_t> &indices);
38 
39   ShardSample(const std::vector<int64_t> &indices, uint32_t seed);
40 
~ShardSample()41   ~ShardSample() override{};
42 
43   Status Execute(ShardTaskList &tasks) override;
44 
45   Status UpdateTasks(ShardTaskList &tasks, int64_t taking);  // NOLINT
46 
47   Status SufExecute(ShardTaskList &tasks) override;
48 
49   int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override;
50 
51  private:
52   // Update the partition_shard_sample_count_ in tasks
53   Status UpdatePartitionWhenSlowMode(ShardTaskList &tasks);  // NOLINT
54 
55  protected:
56   int64_t numerator_;
57   int64_t denominator_;
58   int64_t partition_id_;
59   int64_t no_of_samples_;
60   std::shared_ptr<ShardShuffle> shuffle_op_;
61   std::vector<int64_t> nums_per_shard_;
62 
63  private:
64   std::vector<int64_t> indices_;
65   SamplerType sampler_type_;
66   int64_t offset_;
67 };
68 }  // namespace mindrecord
69 }  // namespace mindspore
70 
71 #endif  // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_SAMPLE_H_
72