1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/engine/datasetops/parallel_op.h"
17
18 #include <iostream>
19 #include "minddata/dataset/engine/datasetops/dataset_op.h"
20 #include "minddata/dataset/engine/execution_tree.h"
21 #include "minddata/dataset/engine/db_connector.h"
22 #include "minddata/dataset/util/task_manager.h"
23
24 namespace mindspore {
25 namespace dataset {
26 // Constructor
ParallelOp(int32_t num_workers,int32_t op_connector_size,std::shared_ptr<SamplerRT> sampler)27 ParallelOp::ParallelOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<SamplerRT> sampler)
28 : DatasetOp(op_connector_size, sampler),
29 num_workers_(num_workers),
30 num_producers_(num_workers),
31 worker_connector_size_(1),
32 worker_connector_(nullptr),
33 num_workers_paused_(0),
34 epoch_sync_flag_(false) {
35 // reduce excessive memory usage with high parallelism
36 // when num_workers > 4, reduce op_connector_size to have similar total size if there were only 4 workers
37 constexpr int32_t worker_limit = 4;
38 if (num_workers_ > worker_limit) {
39 oc_queue_size_ = std::max(1, op_connector_size * worker_limit / num_workers_);
40 }
41 }
42
43 // Creates the internal worker connector for the parallel op if the derived class wants to use it
CreateWorkerConnector(int32_t worker_connector_size)44 Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) {
45 if (worker_connector_size == 0) {
46 RETURN_STATUS_UNEXPECTED("Create Worker Connector failed, as given connector size 0 is invalid.");
47 }
48 num_producers_ = 1;
49 worker_connector_size_ = worker_connector_size;
50 // Instantiate the worker connector. This is the internal connector, not the operators
51 // output connector. It has single master consuming from it (num producers is 1), and the number
52 // of workers is the defined count from the op.
53 worker_connector_ = std::make_unique<DbConnector>(num_workers_, num_producers_, worker_connector_size);
54
55 return Status::OK();
56 }
57
58 // A print method typically used for debugging
Print(std::ostream & out,bool show_all) const59 void ParallelOp::Print(std::ostream &out, bool show_all) const {
60 DatasetOp::Print(out, show_all);
61 out << " [workers: " << num_workers_ << "]";
62 }
63
64 // Override base class reset to provide reset actions specific to the ParallelOp class.
Reset()65 Status ParallelOp::Reset() {
66 RETURN_IF_NOT_OK(DatasetOp::Reset()); // Perform any super class reset work
67
68 // ParallelOp is abstract, but we do own the connector between workers and master
69 // (if the parallel op is configured for this). Reset that connector here.
70 if (worker_connector_) {
71 worker_connector_->Reset();
72 }
73
74 return Status::OK();
75 }
76
77 // Register the internal worker connectors
RegisterWorkerConnectors()78 Status ParallelOp::RegisterWorkerConnectors() {
79 if (worker_connector_) {
80 return (worker_connector_->Register(tree_->AllTasks()));
81 }
82 return Status::OK();
83 }
84
WaitForWorkers()85 Status ParallelOp::WaitForWorkers() {
86 num_workers_paused_ = 0;
87 for (int32_t i = 0; i < num_workers_; i++) {
88 RETURN_IF_NOT_OK(io_block_queues_[i]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagWait)));
89 }
90 RETURN_IF_NOT_OK(wait_for_workers_post_.Wait());
91 wait_for_workers_post_.Clear();
92 return Status::OK();
93 }
94 } // namespace dataset
95 } // namespace mindspore
96