• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_
18 #define MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_
19 
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "minddata/mindrecord/include/shard_operator.h"
25 #include "minddata/mindrecord/include/shard_shuffle.h"
26 #include "minddata/mindrecord/include/shard_sample.h"
27 
28 namespace mindspore {
29 namespace mindrecord {
30 class MINDRECORD_API ShardDistributedSample : public ShardSample {
31  public:
32   ShardDistributedSample(int num_shards, int shard_id, int64_t no_of_padded_samples, bool shuffle, uint32_t seed,
33                          int64_t no_of_samples = 0, int64_t offset = -1);
34 
35   ShardDistributedSample(int num_shards, int shard_id, bool shuffle, uint32_t seed, int64_t no_of_samples = 0,
36                          int64_t offset = -1);
37 
SetNumPaddedSamples(int64_t no_of_padded_samples)38   void SetNumPaddedSamples(int64_t no_of_padded_samples) { no_of_padded_samples_ = no_of_padded_samples; }
39 
~ShardDistributedSample()40   ~ShardDistributedSample() override{};
41 
42   Status PreExecute(ShardTaskList &tasks) override;
43 
44   int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override;
45 
46  private:
47   bool shuffle_;
48   int64_t no_of_padded_samples_;
49   bool first_epoch_;    // check (num_sample + num_padded) % num_shards == 0 in first epoch
50   ShardTaskList task_;  // maintain the input tasks in first epoch
51 };
52 }  // namespace mindrecord
53 }  // namespace mindspore
54 
55 #endif  // MINDSPORE_CCSRC_MINDDATA_MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_
56