/third_party/mindspore/tests/ut/python/dataset/ |
D | test_datasets_get_dataset_size.py | 43 ds_shard_1_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=1, shard_id=0) 46 ds_shard_2_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=2, shard_id=0) 49 ds_shard_3_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=3, shard_id=0) 57 …ds_shard_1_0 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, num_shards=1, shard_id=0, shard_equal_rows=… 60 …ds_shard_2_0 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, num_shards=2, shard_id=0, shard_equal_rows=… 63 …ds_shard_3_0 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, num_shards=3, shard_id=0, shard_equal_rows=… 72 ds_shard_4_0 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, num_shards=4, shard_id=0) 91 ds_shard_1_0 = ds.MnistDataset(MNIST_DATA_DIR, num_shards=1, shard_id=0) 94 ds_shard_2_0 = ds.MnistDataset(MNIST_DATA_DIR, num_shards=2, shard_id=0) 97 ds_shard_3_0 = ds.MnistDataset(MNIST_DATA_DIR, num_shards=3, shard_id=0) [all …]
|
D | test_datasets_sharding.py | 22 def sharding_config(num_shards, shard_id, num_samples, shuffle, class_index, repeat_cnt=1): argument 24 shard_id=shard_id, 57 def sharding_config(num_shards, shard_id, num_samples, repeat_cnt=1): argument 58 …data1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, num_samples=num_sam… 95 def sharding_config(num_shards, shard_id, num_samples, repeat_cnt=1): argument 96 …data1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, num_samples=num_sam… 139 def sharding_config(num_shards, shard_id, num_samples, shuffle, repeat_cnt=1): argument 140 …s.ManifestDataset(manifest_file, num_samples=num_samples, num_shards=num_shards, shard_id=shard_id, 164 def sharding_config(num_shards, shard_id, num_samples, shuffle, repeat_cnt=1): argument 165 … sampler = ds.DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) [all …]
|
D | test_paddeddataset.py | 58 …testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=N… 81 …distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_s… 105 testsampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=None) 126 testsampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=None) 143 …testsampler = ds.DistributedSampler(num_shards=4, shard_id=0, shuffle=False, num_samples=None, off… 151 …testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=No… 175 …distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_s… 198 … testsampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=None) 208 … testsampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=True, num_samples=None) 213 testsampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=5) [all …]
|
D | test_datasets_usps.py | 175 ds.USPSDataset(DATA_DIR, "train", shard_id=0) 176 ds.USPSDataset(DATA_DIR, "test", shard_id=0) 180 ds.USPSDataset(DATA_DIR, "train", num_shards=5, shard_id=-1) 181 ds.USPSDataset(DATA_DIR, "test", num_shards=5, shard_id=-1) 183 ds.USPSDataset(DATA_DIR, "train", num_shards=5, shard_id=5) 184 ds.USPSDataset(DATA_DIR, "test", num_shards=5, shard_id=5) 186 ds.USPSDataset(DATA_DIR, "train", num_shards=2, shard_id=5) 187 ds.USPSDataset(DATA_DIR, "test", num_shards=2, shard_id=5) 202 ds.USPSDataset(DATA_DIR, "train", num_shards=2, shard_id="0") 203 ds.USPSDataset(DATA_DIR, "test", num_shards=2, shard_id="0")
|
D | test_datasets_sbd.py | 159 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', num_shards=2, shard_id=0, 168 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', shard_id=0) 172 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', num_shards=5, shard_id=-1) 174 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', num_shards=5, shard_id=5) 176 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', num_shards=2, shard_id=5) 188 ds.SBDataset(DATASET_DIR, task='Segmentation', usage='train', num_shards=2, shard_id="0")
|
D | test_minddataset.py | 257 num_shards=num_shards, shard_id=partition_id) 280 shard_id=partition_id, num_samples=1) 305 shard_id=partition_id, num_samples=2) 330 shard_id=partition_id, num_samples=3) 350 …data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=1, shard_id=0,… 372 num_shards=num_shards, shard_id=partition_id) 411 num_shards=num_shards, shard_id=partition_id) 548 num_shards=4, shard_id=3) 611 …MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1) 2246 shard_id=3) [all …]
|
D | test_sampler_chain.py | 153 … sampler = ds.DistributedSampler(num_shards=1, shard_id=0, shuffle=False, num_samples=3, offset=1) 180 …child_sampler = ds.DistributedSampler(num_shards=1, shard_id=0, shuffle=False, num_samples=3, offs… 205 sampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=5) 233 sampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=5) 261 sampler = ds.DistributedSampler(num_shards=2, shard_id=0, shuffle=False, num_samples=5) 362 sampler = ds.DistributedSampler(num_shards=1, shard_id=0, shuffle=False, num_samples=5) 391 sampler = ds.DistributedSampler(num_shards=1, shard_id=0, shuffle=False, num_samples=5)
|
/third_party/mindspore/mindspore/ccsrc/minddata/mindrecord/io/ |
D | shard_reader.cc | 301 for (int shard_id = 0; shard_id < shard_count; ++shard_id) { in ReadRowGroupSummary() local 303 auto last_page_id = shard_header_->GetLastPageId(shard_id); in ReadRowGroupSummary() 309 (void)shard_header_->GetPage(shard_id, page_id, &page_ptr); in ReadRowGroupSummary() 319 … row_group_summary.emplace_back(shard_id, page_ptr->GetPageTypeID(), start_row_id, number_of_rows); in ReadRowGroupSummary() 331 int shard_id, const std::vector<std::string> &columns, in ConvertLabelToJson() argument 338 (*offset_ptr)[shard_id].emplace_back( in ConvertLabelToJson() 339 std::vector<uint64_t>{static_cast<uint64_t>(shard_id), group_id, offset_start, offset_end}); in ConvertLabelToJson() 367 (*col_val_ptr)[shard_id].emplace_back(tmp); in ConvertLabelToJson() 387 (*col_val_ptr)[shard_id].emplace_back(construct_json); in ConvertLabelToJson() 407 Status ShardReader::ReadAllRowsInShard(int shard_id, const std::string &sql, const std::vector<std:… in ReadAllRowsInShard() argument [all …]
|
D | shard_writer.cc | 689 Status ShardWriter::WriteByShard(int shard_id, int start_row, int end_row, in WriteByShard() argument 692 MS_LOG(DEBUG) << "Shard: " << shard_id << ", start: " << start_row << ", end: " << end_row in WriteByShard() 700 SetLastRawPage(shard_id, last_raw_page); in WriteByShard() 701 SetLastBlobPage(shard_id, last_blob_page); in WriteByShard() 704 RETURN_IF_NOT_OK(AppendBlobPage(shard_id, blob_data, rows_in_group, last_blob_page)); in WriteByShard() 705 RETURN_IF_NOT_OK(NewBlobPage(shard_id, blob_data, rows_in_group, last_blob_page)); in WriteByShard() 706 RETURN_IF_NOT_OK(ShiftRawPage(shard_id, rows_in_group, last_raw_page)); in WriteByShard() 707 RETURN_IF_NOT_OK(WriteRawPage(shard_id, rows_in_group, last_raw_page, bin_raw_data)); in WriteByShard() 750 Status ShardWriter::AppendBlobPage(const int &shard_id, const std::vector<std::vector<uint8_t>> &bl… in AppendBlobPage() argument 760 …auto &io_seekp = file_streams_[shard_id]->seekp(page_size_ * page_id + header_size_ + bytes_page, … in AppendBlobPage() [all …]
|
D | shard_segment.cc | 174 auto shard_id = std::get<0>(rg); in ReadAtPageById() local 178 group_id, shard_id, in ReadAtPageById() 191 RETURN_IF_NOT_OK(PackImages(group_id, shard_id, offsets[i], &images_ptr)); in ReadAtPageById() 200 Status ShardSegment::PackImages(int group_id, int shard_id, std::vector<uint64_t> offset, in PackImages() argument 204 RETURN_IF_NOT_OK(shard_header_->GetPageByGroupId(group_id, shard_id, &page_ptr)); in PackImages() 209 auto &io_seekg = file_streams_random_[0][shard_id]->seekg(file_offset, std::ios::beg); in PackImages() 211 file_streams_random_[0][shard_id]->close(); in PackImages() 216 …file_streams_random_[0][shard_id]->read(reinterpret_cast<char *>(&((*(*images_ptr))[0])), offset[1… in PackImages() 218 file_streams_random_[0][shard_id]->close(); in PackImages() 264 auto shard_id = std::get<0>(rg); in ReadAllAtPageById() local [all …]
|
/third_party/mindspore/mindspore/ccsrc/minddata/mindrecord/include/ |
D | shard_task_list.h | 54 …inline void InsertTask(TaskType task_type, int shard_id, int group_id, const std::vector<uint64_t>… 57 inline void InsertTask(const uint32_t &i, TaskType task_type, int shard_id, int group_id, 104 inline void ShardTaskList::InsertTask(TaskType task_type, int shard_id, int group_id, in InsertTask() argument 106 …MS_LOG(DEBUG) << "Insert task into task list, shard_id: " << shard_id << ", group_id: " << group_id in InsertTask() 108 task_list_.emplace_back(task_type, std::make_tuple(shard_id, group_id), offset, label); in InsertTask() 111 inline void ShardTaskList::InsertTask(const uint32_t &i, TaskType task_type, int shard_id, int grou… in InsertTask() argument 113 …MS_LOG(DEBUG) << "Insert task into task list, shard_id: " << shard_id << ", group_id: " << group_id in InsertTask() 115 task_list_[i] = {task_type, std::make_tuple(shard_id, group_id), offset, label}; in InsertTask()
|
D | shard_reader.h | 151 Status ReadRowGroupBrief(int group_id, int shard_id, const std::vector<std::string> &columns, 165 …Status ReadRowGroupCriteria(int group_id, int shard_id, const std::pair<std::string, std::string> … 208 … std::shared_ptr<std::vector<std::vector<std::vector<uint64_t>>>> offset_ptr, int shard_id, 216 …ReadRowGroupByShardIDAndSampleID(const std::vector<std::string> &columns, const uint32_t &shard_id, 220 …Status ReadAllRowsInShard(int shard_id, const std::string &sql, const std::vector<std::string> &co… 234 std::vector<std::vector<uint64_t>> GetImageOffset(int group_id, int shard_id, 238 Status GetPagesByCategory(int shard_id, const std::pair<std::string, std::string> &criteria, 247 Status GetLabels(int page_id, int shard_id, const std::vector<std::string> &columns, 251 Status GetLabelsFromPage(int page_id, int shard_id, const std::vector<std::string> &columns, 280 Status GetLabelsFromBinaryFile(int shard_id, const std::vector<std::string> &columns, [all …]
|
D | shard_writer.h | 140 …Status WriteByShard(int shard_id, int start_row, int end_row, const std::vector<std::vector<uint8_… 149 Status AppendBlobPage(const int &shard_id, const std::vector<std::vector<uint8_t>> &blob_data, 154 Status NewBlobPage(const int &shard_id, const std::vector<std::vector<uint8_t>> &blob_data, 159 Status ShiftRawPage(const int &shard_id, const std::vector<std::pair<int, int>> &rows_in_group, 163 Status WriteRawPage(const int &shard_id, const std::vector<std::pair<int, int>> &rows_in_group, 167 Status EmptyRawPage(const int &shard_id, std::shared_ptr<Page> &last_raw_page); 170 …Status AppendRawPage(const int &shard_id, const std::vector<std::pair<int, int>> &rows_in_group, c… 192 Status SetLastRawPage(const int &shard_id, std::shared_ptr<Page> &last_raw_page); 195 Status SetLastBlobPage(const int &shard_id, std::shared_ptr<Page> &last_blob_page);
|
D | shard_header.h | 90 std::string GetShardAddressByID(int64_t shard_id); 102 Status GetPage(const int &shard_id, const int &page_id, std::shared_ptr<Page> *page_ptr); 108 int64_t GetLastPageId(const int &shard_id); 110 int GetLastPageIdByType(const int &shard_id, const std::string &page_type); 112 …Status GetPageByGroupId(const int &group_id, const int &shard_id, std::shared_ptr<Page> *page_ptr);
|
/third_party/mindspore/mindspore/ccsrc/minddata/mindrecord/meta/ |
D | shard_header.cc | 237 int shard_id = page["shard_id"]; in ParsePage() local 249 …std::shared_ptr<Page> parsed_page = std::make_shared<Page>(page_id, shard_id, page_type, page_type… in ParsePage() 252 pages_[shard_id].push_back(std::move(parsed_page)); in ParsePage() 369 Status ShardHeader::GetPage(const int &shard_id, const int &page_id, std::shared_ptr<Page> *page_pt… in GetPage() argument 371 …if (shard_id < static_cast<int>(pages_.size()) && page_id < static_cast<int>(pages_[shard_id].size… in GetPage() 372 *page_ptr = pages_[shard_id][page_id]; in GetPage() 380 int shard_id = new_page->GetShardID(); in SetPage() local 382 …if (shard_id < static_cast<int>(pages_.size()) && page_id < static_cast<int>(pages_[shard_id].size… in SetPage() 383 pages_[shard_id][page_id] = new_page; in SetPage() 390 int shard_id = new_page->GetShardID(); in AddPage() local [all …]
|
D | shard_distributed_sample.cc | 25 ShardDistributedSample::ShardDistributedSample(int num_shards, int shard_id, int no_of_padded_sampl… in ShardDistributedSample() argument 27 : ShardSample(1, num_shards, shard_id, no_of_samples, offset), in ShardDistributedSample() 34 ShardDistributedSample::ShardDistributedSample(int num_shards, int shard_id, bool shuffle, uint32_t… in ShardDistributedSample() argument 36 : ShardDistributedSample(num_shards, shard_id, 0, shuffle, seed, no_of_samples, offset) {} in ShardDistributedSample()
|
/third_party/mindspore/mindspore/dataset/engine/ |
D | datasets.py | 1881 …__(self, num_parallel_workers=None, num_samples=None, shuffle=True, num_shards=None, shard_id=None, argument 1886 self.shard_id = replace_none(shard_id, 0) 1959 shard_id=None, cache=None): argument 1961 num_shards=num_shards, shard_id=shard_id, cache=cache) 1963 self.sampler = samplers.select_sampler(num_samples, sampler, shuffle, num_shards, shard_id) 3381 … extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None): argument 3383 shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache) 3503 sampler=None, num_shards=None, shard_id=None, cache=None): argument 3505 shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache) 3604 … shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None): argument [all …]
|
D | samplers.py | 29 def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id): argument 54 (any(arg is not None for arg in [num_shards, shard_id, shuffle, num_samples]))): 57 ' shard_id: {}, shuffle: {}.'.format(num_samples, num_shards, shard_id, shuffle)) 72 … return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) 80 … return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) 87 return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) 354 def __init__(self, num_shards, shard_id, shuffle=True, num_samples=None, offset=-1): argument 358 if not isinstance(shard_id, int): 359 raise TypeError("shard_id must be integer but was: {}.".format(shard_id)) 375 self.shard_id = shard_id [all …]
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/ |
D | tf_record_node.h | 40 … int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache) in TFRecordNode() argument 48 shard_id_(shard_id), in TFRecordNode() 61 … int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache) in TFRecordNode() argument 69 shard_id_(shard_id), in TFRecordNode() 98 Status GetShardId(int32_t *const shard_id) override;
|
D | text_file_node.cc | 27 … int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache) in TextFileNode() argument 33 shard_id_(shard_id) { in TextFileNode() 118 Status TextFileNode::GetShardId(int32_t *shard_id) { in GetShardId() argument 119 *shard_id = shard_id_; in GetShardId() 168 int32_t shard_id = json_obj["shard_id"]; in from_json() local 171 …*ds = std::make_shared<TextFileNode>(dataset_files, num_samples, shuffle, num_shards, shard_id, ca… in from_json()
|
D | csv_node.cc | 29 int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache) in CSVNode() argument 38 shard_id_(shard_id) { in CSVNode() 149 Status CSVNode::GetShardId(int32_t *shard_id) { in GetShardId() argument 150 *shard_id = shard_id_; in GetShardId() 207 int32_t shard_id = json_obj["shard_id"]; in from_json() local 211 shuffle, num_shards, shard_id, cache); in from_json()
|
D | usps_node.cc | 31 int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache) in USPSNode() argument 38 shard_id_(shard_id) { in USPSNode() 111 Status USPSNode::GetShardId(int32_t *shard_id) { in GetShardId() argument 112 *shard_id = shard_id_; in GetShardId()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/ |
D | bindings.cc | 108 int32_t shuffle, int32_t num_shards, int32_t shard_id) { in __anon810239690902() argument 111 … toShuffleMode(shuffle), num_shards, shard_id, nullptr); in __anon810239690902() 133 int32_t num_shards, int32_t shard_id) { in __anon810239690d02() argument 136 … num_samples, toShuffleMode(shuffle), num_shards, shard_id, nullptr); in __anon810239690d02() 283 int32_t shard_id) { in __anon810239692402() argument 286 … toShuffleMode(shuffle), num_shards, shard_id, nullptr); in __anon810239692402() 297 int32_t num_shards, int32_t shard_id, bool shard_equal_rows) { in __anon810239692602() argument 300 toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr); in __anon810239692602() 305 … int64_t num_samples, int32_t shuffle, int32_t num_shards, int32_t shard_id, in __anon810239692602() 309 toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr); in __anon810239692602() [all …]
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/ |
D | dataset_node.cc | 126 …ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id) { in ValidateDatasetShardParams() argument 133 if (shard_id < 0 || shard_id >= num_shards) { in ValidateDatasetShardParams() 135 std::string err_msg = dataset_name + ": Invalid input, shard_id: " + std::to_string(shard_id) + in ValidateDatasetShardParams() 196 …amplerObj> SelectSampler(int64_t num_samples, bool shuffle, int32_t num_shards, int32_t shard_id) { in SelectSampler() argument 200 return DistributedSampler(num_shards, shard_id, shuffle, num_samples).Parse(); in SelectSampler() 207 return DistributedSampler(num_shards, shard_id, shuffle, num_samples).Parse(); in SelectSampler() 599 Status DatasetNode::GetShardId(int32_t *const shard_id) { in GetShardId() argument 602 return children_[0]->GetShardId(shard_id); in GetShardId() 607 return children_.back()->GetShardId(shard_id); in GetShardId()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/ |
D | distributed_sampler_ir.cc | 34 DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, in… in DistributedSamplerObj() argument 37 shard_id_(shard_id), in DistributedSamplerObj() 120 int64_t shard_id = json_obj["shard_id"]; in from_json() local 126 …std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed, offset, … in from_json()
|