• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16"""config script"""
17
18from easydict import EasyDict as edict
19from mindspore.common import dtype as mstype
20from src.model import AlbertConfig
21
22
23gradient_cfg = edict({
24    'clip_type': 1,
25    'clip_value': 1.0
26})
27
28
29train_cfg = edict({
30    'batch_size': 16,
31    'loss_scale_value': 2 ** 16,
32    'scale_factor': 2,
33    'scale_window': 50,
34    'max_global_epoch': 10, #fl_iteration_num
35    'server_cfg': edict({
36        'learning_rate': 1e-5,
37        'max_local_epoch': 1,
38        'cyclic_trunc': False
39    }),
40    'client_cfg': edict({
41        'learning_rate': 1e-5,
42        'max_local_epoch': 1,
43        'num_per_epoch': 20,
44        'cyclic_trunc': True
45    }),
46    'optimizer_cfg': edict({
47        'AdamWeightDecay': edict({
48            'end_learning_rate': 1e-14,
49            'power': 1.0,
50            'weight_decay': 1e-4,
51            'eps': 1e-6,
52            'decay_filter': lambda x: 'norm' not in x.name.lower() and 'bias' not in x.name.lower(),
53            'warmup_ratio': 0.1
54        }),
55    }),
56})
57
58eval_cfg = edict({
59    'batch_size': 256,
60})
61
62server_net_cfg = AlbertConfig(
63    seq_length=8,
64    vocab_size=11682,
65    hidden_size=312,
66    num_hidden_groups=1,
67    num_hidden_layers=4,
68    inner_group_num=1,
69    num_attention_heads=12,
70    intermediate_size=1248,
71    hidden_act="gelu",
72    query_act=None,
73    key_act=None,
74    value_act=None,
75    hidden_dropout_prob=0.0,
76    attention_probs_dropout_prob=0.0,
77    max_position_embeddings=512,
78    type_vocab_size=2,
79    initializer_range=0.02,
80    use_relative_positions=False,
81    classifier_dropout_prob=0.0,
82    embedding_size=128,
83    layer_norm_eps=1e-12,
84    has_attention_mask=True,
85    do_return_2d_tensor=True,
86    use_one_hot_embeddings=False,
87    use_token_type=True,
88    return_all_encoders=False,
89    output_attentions=False,
90    output_hidden_states=False,
91    dtype=mstype.float32,
92    compute_type=mstype.float32,
93    is_training=True,
94    num_labels=4,
95    use_word_embeddings=True
96)
97
98client_net_cfg = AlbertConfig(
99    seq_length=8,
100    vocab_size=11682,
101    hidden_size=312,
102    num_hidden_groups=1,
103    num_hidden_layers=4,
104    inner_group_num=1,
105    num_attention_heads=12,
106    intermediate_size=1248,
107    hidden_act="gelu",
108    query_act=None,
109    key_act=None,
110    value_act=None,
111    hidden_dropout_prob=0.0,
112    attention_probs_dropout_prob=0.0,
113    max_position_embeddings=512,
114    type_vocab_size=2,
115    initializer_range=0.02,
116    use_relative_positions=False,
117    classifier_dropout_prob=0.0,
118    embedding_size=128,
119    layer_norm_eps=1e-12,
120    has_attention_mask=True,
121    do_return_2d_tensor=True,
122    use_one_hot_embeddings=False,
123    use_token_type=True,
124    return_all_encoders=False,
125    output_attentions=False,
126    output_hidden_states=False,
127    dtype=mstype.float32,
128    compute_type=mstype.float32,
129    is_training=True,
130    num_labels=4,
131    use_word_embeddings=True
132)
133