• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""
16Conversion interface for second-order optimizer thor
17"""
18import mindspore.nn as nn
19import mindspore.common.dtype as mstype
20from mindspore import context
21
22
23class ConvertNetUtils:
24    """
25    Convert net to thor layer net
26    """
27    def __init__(self):
28        self._convert_method_map = {nn.Dense: ConvertNetUtils._convert_dense,
29                                    nn.Embedding: ConvertNetUtils._convert_embedding,
30                                    nn.Conv2d: ConvertNetUtils._convert_conv2d,
31                                    nn.EmbeddingLookup: ConvertNetUtils._convert_embeddinglookup}
32
33
34    @staticmethod
35    def _convert_dense(subcell):
36        """
37        Convert dense cell to second-order cell
38        """
39        weight = subcell.weight
40        act_name = None
41        if subcell.activation_flag:
42            act_class = subcell.activation.__class__.__name__
43            act_name = act_class.lower()
44            if act_name == "fastgelu":
45                act_name = "fast_gelu"
46        if subcell.out_channels == 1001:
47            new_subcell = nn.DenseThor(in_channels=subcell.in_channels,
48                                       out_channels=subcell.out_channels,
49                                       weight_init=weight,
50                                       has_bias=subcell.has_bias,
51                                       bias_init='zeros',
52                                       activation=act_name)
53        else:
54            compute_type = mstype.float16
55            if context.get_context("device_target") == "GPU":
56                compute_type = mstype.float32
57            new_subcell = nn.DenseThor(in_channels=subcell.in_channels,
58                                       out_channels=subcell.out_channels,
59                                       weight_init=weight,
60                                       has_bias=subcell.has_bias,
61                                       bias_init='zeros',
62                                       activation=act_name).to_float(compute_type)
63
64        if subcell.has_bias:
65            new_subcell.bias = subcell.bias
66        return new_subcell
67
68
69    @staticmethod
70    def _convert_embedding(subcell):
71        """
72        Convert embedding cell to second-order cell
73        """
74        new_subcell = nn.EmbeddingThor(vocab_size=subcell.vocab_size,
75                                       embedding_size=subcell.embedding_size,
76                                       use_one_hot=False)
77        new_subcell.embedding_table = subcell.embedding_table
78        return new_subcell
79
80
81    @staticmethod
82    def _convert_embeddinglookup(subcell):
83        """
84        convert embedding cell to second_order cell
85        """
86        new_subcell = nn.EmbeddingLookupThor(vocab_size=subcell.vocab_size,
87                                             embedding_size=subcell.embedding_size,
88                                             target=subcell.target, sparse=subcell.sparse,
89                                             vocab_cache_size=subcell.vocab_cache_size)
90        new_subcell.embedding_table = subcell.embedding_table
91        return new_subcell
92
93
94    @staticmethod
95    def _convert_conv2d(subcell):
96        """
97        Convert conv2d cell to second-order cell
98        """
99        out_channel = subcell.out_channels
100        in_channel = subcell.in_channels
101        kernel_size = subcell.kernel_size[0]
102        stride = subcell.stride
103        padding = subcell.padding
104        pad_mode = subcell.pad_mode
105        has_bias = subcell.has_bias
106        weight = subcell.weight
107
108        new_subcell = nn.Conv2dThor(in_channel, out_channel,
109                                    kernel_size=kernel_size, stride=stride, padding=padding, pad_mode=pad_mode,
110                                    has_bias=has_bias, weight_init=weight)
111        return new_subcell
112
113    def _need_change(self, subcell, prefix):
114        """for thor layers, need to change"""
115        if isinstance(subcell, (nn.Dense, nn.Conv2d)) and subcell.weight.requires_grad:
116            if "rpn_with_loss.rpn_convs_list." in prefix.lower() or "wide" in prefix.lower():
117                return False
118            return True
119        if isinstance(subcell, (nn.Embedding, nn.EmbeddingLookup)) and subcell.embedding_table.requires_grad:
120            return True
121        return False
122
123    def _convert_to_thor_net(self, net):
124        """
125        Convert net to thor net
126        """
127        cells = net.name_cells()
128        change = False
129        for name in cells:
130            subcell = cells[name]
131            if subcell == net:
132                continue
133            elif isinstance(subcell, (nn.DenseThor, nn.Conv2dThor, nn.EmbeddingThor, nn.EmbeddingLookupThor)):
134                continue
135            elif isinstance(subcell, (nn.Conv2dTranspose, nn.Conv1d, nn.Conv1dTranspose, nn.BatchNorm1d, nn.GroupNorm,
136                                      nn.GlobalBatchNorm, nn.LayerNorm, nn.BatchNorm2d, nn.MaxPool2d)):
137                continue
138            elif isinstance(subcell, (nn.Embedding, nn.Dense, nn.Conv2d, nn.EmbeddingLookup)):
139                prefix = subcell.param_prefix
140                if self._need_change(subcell, prefix):
141                    new_subcell = self._convert_method_map[type(subcell)](subcell)
142                    new_subcell.update_parameters_name(prefix + '.')
143                    net.insert_child_to_cell(name, new_subcell)
144                    change = True
145            else:
146                self._convert_to_thor_net(subcell)
147
148        if isinstance(net, nn.SequentialCell) and change:
149            net.cell_list = list(net.cells())
150
151
152    def convert_to_thor_net(self, net):
153        """
154        This interface is used to convert a network to thor layer network, in order to calculate and store the
155        second-order information matrix.
156
157        Note:
158            This interface is automatically called by the second-order optimizer thor.
159
160        Args:
161            net (Cell): Network to be trained by the second-order optimizer thor.
162
163        Supported Platforms:
164            ``Ascend`` ``GPU``
165
166        Examples:
167            >>> ConvertNetUtils().convert_to_thor_net(net)
168
169        """
170
171        net.update_cell_prefix()
172        self._convert_to_thor_net(net)
173        net.update_cell_type("second-order")
174
175
176class ConvertModelUtils:
177    """
178    Convert model to thor model.
179    """
180    @staticmethod
181    def convert_to_thor_model(model, network, loss_fn=None, optimizer=None, metrics=None, amp_level="O0",
182                              loss_scale_manager=None, keep_batchnorm_fp32=False):
183        """
184        This interface is used to convert model to thor model.
185
186        Args:
187            model (Object): High-Level API for Training.
188                            `Model` groups layers into an object with training features.
189            network (Cell): A training network.
190            loss_fn (Cell): Objective function. Default: None.
191            optimizer (Cell): Optimizer used to updating the weights. Default: None.
192            metrics (Union[dict, set]): A Dictionary or a set of metrics to be evaluated by the model during
193                                        training. eg: {'accuracy', 'recall'}. Default: None.
194            amp_level (str): Level for mixed precision training. Supports ["O0", "O2", "O3", "auto"]. Default: "O0".
195
196                - O0: Do not change.
197                - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale.
198                - O3: Cast network to float16, with additional property 'keep_batchnorm_fp32=False'.
199                - auto: Set level to recommended level in different devices. O2 is recommended on GPU, O3 is
200                  recommended on Ascend. The recommended level is based on the expert experience, cannot
201                  always generalize. User should specify the level for special network.
202
203            loss_scale_manager (Union[None, LossScaleManager]): If it is None, the loss would not be scaled.
204                Otherwise, scale the loss by LossScaleManager and optimizer can not be None. It is a key argument.
205                e.g. Use `loss_scale_manager=None` to set the value.
206            keep_batchnorm_fp32 (bool): Keep Batchnorm running in `float32`. If True, the level setting before
207                will be overwritten. Default: False.
208
209        Returns:
210             model (Object): High-Level API for Training.
211                            `Model` groups layers into an object with training features.
212
213        Supported Platforms:
214            ``Ascend`` ``GPU``
215
216        Examples:
217            >>> from mindspore import nn
218            >>> from mindspore import Tensor
219            >>> from mindspore.nn import thor
220            >>> from mindspore import Model
221            >>> from mindspore import FixedLossScaleManager
222            >>> from mindspore.train.callback import LossMonitor
223            >>> from mindspore.train.train_thor import ConvertModelUtils
224            >>>
225            >>> net = Net()
226            >>> dataset = create_dataset()
227            >>> temp = Tensor([4e-4, 1e-4, 1e-5, 1e-5], mstype.float32)
228            >>> opt = thor(net, learning_rate=temp, damping=temp, momentum=0.9, loss_scale=128, frequency=4)
229            >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
230            >>> loss_scale = FixedLossScaleManager(128, drop_overflow_update=False)
231            >>> model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
232            ...               amp_level="O2", keep_batchnorm_fp32=False)
233            >>> model = ConvertModelUtils.convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt,
234            ...                                                 loss_scale_manager=loss_scale, metrics={'acc'},
235            ...                                                 amp_level="O2", keep_batchnorm_fp32=False)
236            >>> loss_cb = LossMonitor()
237            >>> model.train(1, dataset, callbacks=loss_cb, sink_size=4, dataset_sink_mode=True)
238        """
239
240        optim_name = type(optimizer).__name__
241        if optim_name in ("ThorAscend", "ThorGpu"):
242            from .model_thor import ModelThor
243            if isinstance(network, nn.TrainOneStepCell):
244                model = ModelThor(network=network)
245            else:
246                model = ModelThor(network=network, loss_fn=loss_fn, optimizer=optimizer, amp_level=amp_level,
247                                  loss_scale_manager=loss_scale_manager,
248                                  keep_batchnorm_fp32=keep_batchnorm_fp32, metrics=metrics)
249
250        return model
251