1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16"""config script""" 17 18from easydict import EasyDict as edict 19from mindspore.common import dtype as mstype 20from src.model import AlbertConfig 21 22 23gradient_cfg = edict({ 24 'clip_type': 1, 25 'clip_value': 1.0 26}) 27 28 29train_cfg = edict({ 30 'batch_size': 16, 31 'loss_scale_value': 2 ** 16, 32 'scale_factor': 2, 33 'scale_window': 50, 34 'max_global_epoch': 10, #fl_iteration_num 35 'server_cfg': edict({ 36 'learning_rate': 1e-5, 37 'max_local_epoch': 1, 38 'cyclic_trunc': False 39 }), 40 'client_cfg': edict({ 41 'learning_rate': 1e-5, 42 'max_local_epoch': 1, 43 'num_per_epoch': 20, 44 'cyclic_trunc': True 45 }), 46 'optimizer_cfg': edict({ 47 'AdamWeightDecay': edict({ 48 'end_learning_rate': 1e-14, 49 'power': 1.0, 50 'weight_decay': 1e-4, 51 'eps': 1e-6, 52 'decay_filter': lambda x: 'norm' not in x.name.lower() and 'bias' not in x.name.lower(), 53 'warmup_ratio': 0.1 54 }), 55 }), 56}) 57 58eval_cfg = edict({ 59 'batch_size': 256, 60}) 61 62server_net_cfg = AlbertConfig( 63 seq_length=8, 64 vocab_size=11682, 65 hidden_size=312, 66 num_hidden_groups=1, 67 num_hidden_layers=4, 68 inner_group_num=1, 69 num_attention_heads=12, 70 intermediate_size=1248, 71 hidden_act="gelu", 72 query_act=None, 73 key_act=None, 74 value_act=None, 75 hidden_dropout_prob=0.0, 76 attention_probs_dropout_prob=0.0, 77 max_position_embeddings=512, 78 type_vocab_size=2, 79 initializer_range=0.02, 80 use_relative_positions=False, 81 classifier_dropout_prob=0.0, 82 embedding_size=128, 83 layer_norm_eps=1e-12, 84 has_attention_mask=True, 85 do_return_2d_tensor=True, 86 use_one_hot_embeddings=False, 87 use_token_type=True, 88 return_all_encoders=False, 89 output_attentions=False, 90 output_hidden_states=False, 91 dtype=mstype.float32, 92 compute_type=mstype.float32, 93 is_training=True, 94 num_labels=4, 95 use_word_embeddings=True 96) 97 98client_net_cfg = AlbertConfig( 99 seq_length=8, 100 vocab_size=11682, 101 hidden_size=312, 102 num_hidden_groups=1, 103 num_hidden_layers=4, 104 inner_group_num=1, 105 num_attention_heads=12, 106 intermediate_size=1248, 107 hidden_act="gelu", 108 query_act=None, 109 key_act=None, 110 value_act=None, 111 hidden_dropout_prob=0.0, 112 attention_probs_dropout_prob=0.0, 113 max_position_embeddings=512, 114 type_vocab_size=2, 115 initializer_range=0.02, 116 use_relative_positions=False, 117 classifier_dropout_prob=0.0, 118 embedding_size=128, 119 layer_norm_eps=1e-12, 120 has_attention_mask=True, 121 do_return_2d_tensor=True, 122 use_one_hot_embeddings=False, 123 use_token_type=True, 124 return_all_encoders=False, 125 output_attentions=False, 126 output_hidden_states=False, 127 dtype=mstype.float32, 128 compute_type=mstype.float32, 129 is_training=True, 130 num_labels=4, 131 use_word_embeddings=True 132) 133