• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16import argparse
17import subprocess
18import os
19import ast
20
21parser = argparse.ArgumentParser(description="Run train_cloud.py case")
22parser.add_argument("--device_target", type=str, default="CPU")
23parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING")
24parser.add_argument("--worker_num", type=int, default=0)
25parser.add_argument("--server_num", type=int, default=2)
26parser.add_argument("--scheduler_ip", type=str, default="127.0.0.1")
27parser.add_argument("--scheduler_port", type=int, default=8113)
28parser.add_argument("--fl_server_port", type=int, default=6666)
29parser.add_argument("--start_fl_job_threshold", type=int, default=1)
30parser.add_argument("--start_fl_job_time_window", type=int, default=3000)
31parser.add_argument("--update_model_ratio", type=float, default=1.0)
32parser.add_argument("--update_model_time_window", type=int, default=3000)
33parser.add_argument("--fl_name", type=str, default="Lenet")
34parser.add_argument("--fl_iteration_num", type=int, default=25)
35parser.add_argument("--client_epoch_num", type=int, default=20)
36parser.add_argument("--client_batch_size", type=int, default=32)
37parser.add_argument("--client_learning_rate", type=float, default=0.1)
38# The number of servers that this script will launch.
39parser.add_argument("--local_server_num", type=int, default=-1)
40parser.add_argument("--encrypt_type", type=str, default="NOT_ENCRYPT")
41# parameters for encrypt_type='DP_ENCRYPT'
42parser.add_argument("--dp_eps", type=float, default=50.0)
43parser.add_argument("--dp_delta", type=float, default=0.01)  # 1/worker_num
44parser.add_argument("--dp_norm_clip", type=float, default=1.0)
45# parameters for encrypt_type='PW_ENCRYPT'
46parser.add_argument("--share_secrets_ratio", type=float, default=1.0)
47parser.add_argument("--cipher_time_window", type=int, default=300000)
48parser.add_argument("--reconstruct_secrets_threshold", type=int, default=3)
49parser.add_argument("--client_password", type=str, default="")
50parser.add_argument("--server_password", type=str, default="")
51parser.add_argument("--enable_ssl", type=ast.literal_eval, default=False)
52
53args, _ = parser.parse_known_args()
54device_target = args.device_target
55server_mode = args.server_mode
56worker_num = args.worker_num
57server_num = args.server_num
58scheduler_ip = args.scheduler_ip
59scheduler_port = args.scheduler_port
60fl_server_port = args.fl_server_port
61start_fl_job_threshold = args.start_fl_job_threshold
62start_fl_job_time_window = args.start_fl_job_time_window
63update_model_ratio = args.update_model_ratio
64update_model_time_window = args.update_model_time_window
65fl_name = args.fl_name
66fl_iteration_num = args.fl_iteration_num
67client_epoch_num = args.client_epoch_num
68client_batch_size = args.client_batch_size
69client_learning_rate = args.client_learning_rate
70local_server_num = args.local_server_num
71dp_eps = args.dp_eps
72dp_delta = args.dp_delta
73dp_norm_clip = args.dp_norm_clip
74encrypt_type = args.encrypt_type
75share_secrets_ratio = args.share_secrets_ratio
76cipher_time_window = args.cipher_time_window
77reconstruct_secrets_threshold = args.reconstruct_secrets_threshold
78client_password = args.client_password
79server_password = args.server_password
80enable_ssl = args.enable_ssl
81
82if local_server_num == -1:
83    local_server_num = server_num
84
85assert local_server_num <= server_num, "The local server number should not be bigger than total server number."
86
87for i in range(local_server_num):
88    os.environ['MS_NODE_ID'] = str(i)
89    cmd_server = "execute_path=$(pwd) && self_path=$(dirname \"${script_self}\") && "
90    cmd_server += "rm -rf ${execute_path}/server_" + str(i) + "/ &&"
91    cmd_server += "mkdir ${execute_path}/server_" + str(i) + "/ &&"
92    cmd_server += "cd ${execute_path}/server_" + str(i) + "/ || exit && export GLOG_v=1 &&"
93    cmd_server += "python ${self_path}/../cloud_train.py"
94    cmd_server += " --device_target=" + device_target
95    cmd_server += " --server_mode=" + server_mode
96    cmd_server += " --ms_role=MS_SERVER"
97    cmd_server += " --worker_num=" + str(worker_num)
98    cmd_server += " --server_num=" + str(server_num)
99    cmd_server += " --scheduler_ip=" + scheduler_ip
100    cmd_server += " --scheduler_port=" + str(scheduler_port)
101    cmd_server += " --fl_server_port=" + str(fl_server_port + i)
102    cmd_server += " --start_fl_job_threshold=" + str(start_fl_job_threshold)
103    cmd_server += " --start_fl_job_time_window=" + str(start_fl_job_time_window)
104    cmd_server += " --update_model_ratio=" + str(update_model_ratio)
105    cmd_server += " --update_model_time_window=" + str(update_model_time_window)
106    cmd_server += " --fl_name=" + fl_name
107    cmd_server += " --fl_iteration_num=" + str(fl_iteration_num)
108    cmd_server += " --client_epoch_num=" + str(client_epoch_num)
109    cmd_server += " --client_batch_size=" + str(client_batch_size)
110    cmd_server += " --client_learning_rate=" + str(client_learning_rate)
111    cmd_server += " --dp_eps=" + str(dp_eps)
112    cmd_server += " --dp_delta=" + str(dp_delta)
113    cmd_server += " --dp_norm_clip=" + str(dp_norm_clip)
114    cmd_server += " --encrypt_type=" + str(encrypt_type)
115    cmd_server += " --share_secrets_ratio=" + str(share_secrets_ratio)
116    cmd_server += " --cipher_time_window=" + str(cipher_time_window)
117    cmd_server += " --client_password=" + str(client_password)
118    cmd_server += " --server_password=" + str(server_password)
119    cmd_server += " --enable_ssl=" + str(enable_ssl)
120    cmd_server += " --reconstruct_secrets_threshold=" + str(reconstruct_secrets_threshold)
121    cmd_server += " > server.log 2>&1 &"
122
123    import time
124    time.sleep(0.3)
125    subprocess.call(['bash', '-c', cmd_server])
126