1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import argparse 17import subprocess 18import os 19import ast 20 21parser = argparse.ArgumentParser(description="Run train_cloud.py case") 22parser.add_argument("--device_target", type=str, default="CPU") 23parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING") 24parser.add_argument("--worker_num", type=int, default=0) 25parser.add_argument("--server_num", type=int, default=2) 26parser.add_argument("--scheduler_ip", type=str, default="127.0.0.1") 27parser.add_argument("--scheduler_port", type=int, default=8113) 28parser.add_argument("--fl_server_port", type=int, default=6666) 29parser.add_argument("--start_fl_job_threshold", type=int, default=1) 30parser.add_argument("--start_fl_job_time_window", type=int, default=3000) 31parser.add_argument("--update_model_ratio", type=float, default=1.0) 32parser.add_argument("--update_model_time_window", type=int, default=3000) 33parser.add_argument("--fl_name", type=str, default="Lenet") 34parser.add_argument("--fl_iteration_num", type=int, default=25) 35parser.add_argument("--client_epoch_num", type=int, default=20) 36parser.add_argument("--client_batch_size", type=int, default=32) 37parser.add_argument("--client_learning_rate", type=float, default=0.1) 38# The number of servers that this script will launch. 39parser.add_argument("--local_server_num", type=int, default=-1) 40parser.add_argument("--encrypt_type", type=str, default="NOT_ENCRYPT") 41# parameters for encrypt_type='DP_ENCRYPT' 42parser.add_argument("--dp_eps", type=float, default=50.0) 43parser.add_argument("--dp_delta", type=float, default=0.01) # 1/worker_num 44parser.add_argument("--dp_norm_clip", type=float, default=1.0) 45# parameters for encrypt_type='PW_ENCRYPT' 46parser.add_argument("--share_secrets_ratio", type=float, default=1.0) 47parser.add_argument("--cipher_time_window", type=int, default=300000) 48parser.add_argument("--reconstruct_secrets_threshold", type=int, default=3) 49parser.add_argument("--client_password", type=str, default="") 50parser.add_argument("--server_password", type=str, default="") 51parser.add_argument("--enable_ssl", type=ast.literal_eval, default=False) 52 53args, _ = parser.parse_known_args() 54device_target = args.device_target 55server_mode = args.server_mode 56worker_num = args.worker_num 57server_num = args.server_num 58scheduler_ip = args.scheduler_ip 59scheduler_port = args.scheduler_port 60fl_server_port = args.fl_server_port 61start_fl_job_threshold = args.start_fl_job_threshold 62start_fl_job_time_window = args.start_fl_job_time_window 63update_model_ratio = args.update_model_ratio 64update_model_time_window = args.update_model_time_window 65fl_name = args.fl_name 66fl_iteration_num = args.fl_iteration_num 67client_epoch_num = args.client_epoch_num 68client_batch_size = args.client_batch_size 69client_learning_rate = args.client_learning_rate 70local_server_num = args.local_server_num 71dp_eps = args.dp_eps 72dp_delta = args.dp_delta 73dp_norm_clip = args.dp_norm_clip 74encrypt_type = args.encrypt_type 75share_secrets_ratio = args.share_secrets_ratio 76cipher_time_window = args.cipher_time_window 77reconstruct_secrets_threshold = args.reconstruct_secrets_threshold 78client_password = args.client_password 79server_password = args.server_password 80enable_ssl = args.enable_ssl 81 82if local_server_num == -1: 83 local_server_num = server_num 84 85assert local_server_num <= server_num, "The local server number should not be bigger than total server number." 86 87for i in range(local_server_num): 88 os.environ['MS_NODE_ID'] = str(i) 89 cmd_server = "execute_path=$(pwd) && self_path=$(dirname \"${script_self}\") && " 90 cmd_server += "rm -rf ${execute_path}/server_" + str(i) + "/ &&" 91 cmd_server += "mkdir ${execute_path}/server_" + str(i) + "/ &&" 92 cmd_server += "cd ${execute_path}/server_" + str(i) + "/ || exit && export GLOG_v=1 &&" 93 cmd_server += "python ${self_path}/../cloud_train.py" 94 cmd_server += " --device_target=" + device_target 95 cmd_server += " --server_mode=" + server_mode 96 cmd_server += " --ms_role=MS_SERVER" 97 cmd_server += " --worker_num=" + str(worker_num) 98 cmd_server += " --server_num=" + str(server_num) 99 cmd_server += " --scheduler_ip=" + scheduler_ip 100 cmd_server += " --scheduler_port=" + str(scheduler_port) 101 cmd_server += " --fl_server_port=" + str(fl_server_port + i) 102 cmd_server += " --start_fl_job_threshold=" + str(start_fl_job_threshold) 103 cmd_server += " --start_fl_job_time_window=" + str(start_fl_job_time_window) 104 cmd_server += " --update_model_ratio=" + str(update_model_ratio) 105 cmd_server += " --update_model_time_window=" + str(update_model_time_window) 106 cmd_server += " --fl_name=" + fl_name 107 cmd_server += " --fl_iteration_num=" + str(fl_iteration_num) 108 cmd_server += " --client_epoch_num=" + str(client_epoch_num) 109 cmd_server += " --client_batch_size=" + str(client_batch_size) 110 cmd_server += " --client_learning_rate=" + str(client_learning_rate) 111 cmd_server += " --dp_eps=" + str(dp_eps) 112 cmd_server += " --dp_delta=" + str(dp_delta) 113 cmd_server += " --dp_norm_clip=" + str(dp_norm_clip) 114 cmd_server += " --encrypt_type=" + str(encrypt_type) 115 cmd_server += " --share_secrets_ratio=" + str(share_secrets_ratio) 116 cmd_server += " --cipher_time_window=" + str(cipher_time_window) 117 cmd_server += " --client_password=" + str(client_password) 118 cmd_server += " --server_password=" + str(server_password) 119 cmd_server += " --enable_ssl=" + str(enable_ssl) 120 cmd_server += " --reconstruct_secrets_threshold=" + str(reconstruct_secrets_threshold) 121 cmd_server += " > server.log 2>&1 &" 122 123 import time 124 time.sleep(0.3) 125 subprocess.call(['bash', '-c', cmd_server]) 126