1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" 16Conversion interface for second-order optimizer thor 17""" 18import mindspore.nn as nn 19import mindspore.common.dtype as mstype 20from mindspore import context 21 22 23class ConvertNetUtils: 24 """ 25 Convert net to thor layer net 26 """ 27 def __init__(self): 28 self._convert_method_map = {nn.Dense: ConvertNetUtils._convert_dense, 29 nn.Embedding: ConvertNetUtils._convert_embedding, 30 nn.Conv2d: ConvertNetUtils._convert_conv2d, 31 nn.EmbeddingLookup: ConvertNetUtils._convert_embeddinglookup} 32 33 34 @staticmethod 35 def _convert_dense(subcell): 36 """ 37 Convert dense cell to second-order cell 38 """ 39 weight = subcell.weight 40 act_name = None 41 if subcell.activation_flag: 42 act_class = subcell.activation.__class__.__name__ 43 act_name = act_class.lower() 44 if act_name == "fastgelu": 45 act_name = "fast_gelu" 46 if subcell.out_channels == 1001: 47 new_subcell = nn.DenseThor(in_channels=subcell.in_channels, 48 out_channels=subcell.out_channels, 49 weight_init=weight, 50 has_bias=subcell.has_bias, 51 bias_init='zeros', 52 activation=act_name) 53 else: 54 compute_type = mstype.float16 55 if context.get_context("device_target") == "GPU": 56 compute_type = mstype.float32 57 new_subcell = nn.DenseThor(in_channels=subcell.in_channels, 58 out_channels=subcell.out_channels, 59 weight_init=weight, 60 has_bias=subcell.has_bias, 61 bias_init='zeros', 62 activation=act_name).to_float(compute_type) 63 64 if subcell.has_bias: 65 new_subcell.bias = subcell.bias 66 return new_subcell 67 68 69 @staticmethod 70 def _convert_embedding(subcell): 71 """ 72 Convert embedding cell to second-order cell 73 """ 74 new_subcell = nn.EmbeddingThor(vocab_size=subcell.vocab_size, 75 embedding_size=subcell.embedding_size, 76 use_one_hot=False) 77 new_subcell.embedding_table = subcell.embedding_table 78 return new_subcell 79 80 81 @staticmethod 82 def _convert_embeddinglookup(subcell): 83 """ 84 convert embedding cell to second_order cell 85 """ 86 new_subcell = nn.EmbeddingLookupThor(vocab_size=subcell.vocab_size, 87 embedding_size=subcell.embedding_size, 88 target=subcell.target, sparse=subcell.sparse, 89 vocab_cache_size=subcell.vocab_cache_size) 90 new_subcell.embedding_table = subcell.embedding_table 91 return new_subcell 92 93 94 @staticmethod 95 def _convert_conv2d(subcell): 96 """ 97 Convert conv2d cell to second-order cell 98 """ 99 out_channel = subcell.out_channels 100 in_channel = subcell.in_channels 101 kernel_size = subcell.kernel_size[0] 102 stride = subcell.stride 103 padding = subcell.padding 104 pad_mode = subcell.pad_mode 105 has_bias = subcell.has_bias 106 weight = subcell.weight 107 108 new_subcell = nn.Conv2dThor(in_channel, out_channel, 109 kernel_size=kernel_size, stride=stride, padding=padding, pad_mode=pad_mode, 110 has_bias=has_bias, weight_init=weight) 111 return new_subcell 112 113 def _need_change(self, subcell, prefix): 114 """for thor layers, need to change""" 115 if isinstance(subcell, (nn.Dense, nn.Conv2d)) and subcell.weight.requires_grad: 116 if "rpn_with_loss.rpn_convs_list." in prefix.lower() or "wide" in prefix.lower(): 117 return False 118 return True 119 if isinstance(subcell, (nn.Embedding, nn.EmbeddingLookup)) and subcell.embedding_table.requires_grad: 120 return True 121 return False 122 123 def _convert_to_thor_net(self, net): 124 """ 125 Convert net to thor net 126 """ 127 cells = net.name_cells() 128 change = False 129 for name in cells: 130 subcell = cells[name] 131 if subcell == net: 132 continue 133 elif isinstance(subcell, (nn.DenseThor, nn.Conv2dThor, nn.EmbeddingThor, nn.EmbeddingLookupThor)): 134 continue 135 elif isinstance(subcell, (nn.Conv2dTranspose, nn.Conv1d, nn.Conv1dTranspose, nn.BatchNorm1d, nn.GroupNorm, 136 nn.GlobalBatchNorm, nn.LayerNorm, nn.BatchNorm2d, nn.MaxPool2d)): 137 continue 138 elif isinstance(subcell, (nn.Embedding, nn.Dense, nn.Conv2d, nn.EmbeddingLookup)): 139 prefix = subcell.param_prefix 140 if self._need_change(subcell, prefix): 141 new_subcell = self._convert_method_map[type(subcell)](subcell) 142 new_subcell.update_parameters_name(prefix + '.') 143 net.insert_child_to_cell(name, new_subcell) 144 change = True 145 else: 146 self._convert_to_thor_net(subcell) 147 148 if isinstance(net, nn.SequentialCell) and change: 149 net.cell_list = list(net.cells()) 150 151 152 def convert_to_thor_net(self, net): 153 """ 154 This interface is used to convert a network to thor layer network, in order to calculate and store the 155 second-order information matrix. 156 157 Note: 158 This interface is automatically called by the second-order optimizer thor. 159 160 Args: 161 net (Cell): Network to be trained by the second-order optimizer thor. 162 163 Supported Platforms: 164 ``Ascend`` ``GPU`` 165 166 Examples: 167 >>> ConvertNetUtils().convert_to_thor_net(net) 168 169 """ 170 171 net.update_cell_prefix() 172 self._convert_to_thor_net(net) 173 net.update_cell_type("second-order") 174 175 176class ConvertModelUtils: 177 """ 178 Convert model to thor model. 179 """ 180 @staticmethod 181 def convert_to_thor_model(model, network, loss_fn=None, optimizer=None, metrics=None, amp_level="O0", 182 loss_scale_manager=None, keep_batchnorm_fp32=False): 183 """ 184 This interface is used to convert model to thor model. 185 186 Args: 187 model (Object): High-Level API for Training. 188 `Model` groups layers into an object with training features. 189 network (Cell): A training network. 190 loss_fn (Cell): Objective function. Default: None. 191 optimizer (Cell): Optimizer used to updating the weights. Default: None. 192 metrics (Union[dict, set]): A Dictionary or a set of metrics to be evaluated by the model during 193 training. eg: {'accuracy', 'recall'}. Default: None. 194 amp_level (str): Level for mixed precision training. Supports ["O0", "O2", "O3", "auto"]. Default: "O0". 195 196 - O0: Do not change. 197 - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. 198 - O3: Cast network to float16, with additional property 'keep_batchnorm_fp32=False'. 199 - auto: Set level to recommended level in different devices. O2 is recommended on GPU, O3 is 200 recommended on Ascend. The recommended level is based on the expert experience, cannot 201 always generalize. User should specify the level for special network. 202 203 loss_scale_manager (Union[None, LossScaleManager]): If it is None, the loss would not be scaled. 204 Otherwise, scale the loss by LossScaleManager and optimizer can not be None. It is a key argument. 205 e.g. Use `loss_scale_manager=None` to set the value. 206 keep_batchnorm_fp32 (bool): Keep Batchnorm running in `float32`. If True, the level setting before 207 will be overwritten. Default: False. 208 209 Returns: 210 model (Object): High-Level API for Training. 211 `Model` groups layers into an object with training features. 212 213 Supported Platforms: 214 ``Ascend`` ``GPU`` 215 216 Examples: 217 >>> from mindspore import nn 218 >>> from mindspore import Tensor 219 >>> from mindspore.nn import thor 220 >>> from mindspore import Model 221 >>> from mindspore import FixedLossScaleManager 222 >>> from mindspore.train.callback import LossMonitor 223 >>> from mindspore.train.train_thor import ConvertModelUtils 224 >>> 225 >>> net = Net() 226 >>> dataset = create_dataset() 227 >>> temp = Tensor([4e-4, 1e-4, 1e-5, 1e-5], mstype.float32) 228 >>> opt = thor(net, learning_rate=temp, damping=temp, momentum=0.9, loss_scale=128, frequency=4) 229 >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') 230 >>> loss_scale = FixedLossScaleManager(128, drop_overflow_update=False) 231 >>> model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, 232 ... amp_level="O2", keep_batchnorm_fp32=False) 233 >>> model = ConvertModelUtils.convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt, 234 ... loss_scale_manager=loss_scale, metrics={'acc'}, 235 ... amp_level="O2", keep_batchnorm_fp32=False) 236 >>> loss_cb = LossMonitor() 237 >>> model.train(1, dataset, callbacks=loss_cb, sink_size=4, dataset_sink_mode=True) 238 """ 239 240 optim_name = type(optimizer).__name__ 241 if optim_name in ("ThorAscend", "ThorGpu"): 242 from .model_thor import ModelThor 243 if isinstance(network, nn.TrainOneStepCell): 244 model = ModelThor(network=network) 245 else: 246 model = ModelThor(network=network, loss_fn=loss_fn, optimizer=optimizer, amp_level=amp_level, 247 loss_scale_manager=loss_scale_manager, 248 keep_batchnorm_fp32=keep_batchnorm_fp32, metrics=metrics) 249 250 return model 251