• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""Export for quantization."""
16
17import copy
18
19import numpy as np
20
21from ... import nn, ops
22from ..._checkparam import Validator
23from ...common import Tensor
24from ...common import dtype as mstype
25from ...common.api import _cell_graph_executor as _executor
26from ...common.parameter import Parameter
27from ...nn import Cell
28from ...nn.layer import quant
29from ...ops import operations as P
30from ...ops import functional as F
31from ...ops.operations import _inner_ops as inner
32from ..quant import quant_utils
33from ..quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
34
35
36__all__ = ["ExportToQuantInferNetwork"]
37
38
39class QuantBlock(Cell):
40    r"""
41    A quant block of Conv/Dense, activation layer for Ascend deploy.
42
43    Calculate Conv or Dense in Int8, with Quant and DeQuant.
44
45    Notes:
46        This block is only for deploy, and not trainable.
47
48    Args:
49        in_channels (int): The number of channels in the input space.
50        out_channels (int): The number of channels in the output space.
51        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
52            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
53        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
54            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
55        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
56        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
57        batchnorm (bool): Specifies to used batchnorm or not. Default: None.
58        activation (string): Specifies activation type. The optional values are as following:
59            'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
60            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
61
62    Inputs:
63        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
64
65    Outputs:
66        Tensor of shape :math:`(N, out\_channels)`.
67    """
68
69    def __init__(self,
70                 core_op,
71                 weight,
72                 quant_op,
73                 dequant_op,
74                 dequant_scale,
75                 bias=None,
76                 activation=None):
77        super(QuantBlock, self).__init__()
78        self.core_op = core_op
79        self.weight = weight
80        self.quant = quant_op
81        self.dequant = dequant_op
82        self.dequant_scale = dequant_scale
83        self.bias = bias
84        self.has_bias = bias is not None
85        self.activation = activation
86        self.has_act = activation is not None
87        self.bias_add = P.BiasAdd()
88        self.sub = P.Sub()
89        self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
90
91    def construct(self, x):
92        x = self.quant(x)
93        if self.has_bias:
94            weight = self.sub(self.weight, self.weight_offset)
95            x = self.core_op(x, weight)
96            x = self.bias_add(x, self.bias)
97        else:
98            x = self.core_op(x, self.weight)
99        x = self.dequant(x, self.dequant_scale)
100        x = F.cast(x, mstype.float32)
101        if self.has_act:
102            x = self.activation(x)
103        return x
104
105    def extend_repr(self):
106        s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
107        if self.has_bias:
108            s += f', bias=shape[{self.bias.shape}]'
109        if self.has_act:
110            s += f', activation={self.activation}'
111        s += f', dequant={self.dequant}'
112        return s
113
114
115class QuantMindirBlock(Cell):
116    """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
117
118       Args:
119        core_op (Cell): The operation cell.
120        weight (Tensor): The weight of the cell.
121        bias (Tensor): The bias of the cell. Default: None.
122        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
123        param_dict (dict): The information of the cell.
124    """
125
126    def __init__(self,
127                 core_op,
128                 weight,
129                 bias=None,
130                 activation=None,
131                 param_dict=None):
132
133        super(QuantMindirBlock, self).__init__()
134        self.core_op = core_op
135        if activation is not None:
136            self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
137        self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
138        self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
139        if param_dict["output_maxq"] is not None:
140            self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
141            self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
142        self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
143        if hasattr(core_op, 'pad_mode'):
144            self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
145        self.core_op.add_prim_attr("act_num_bits", Tensor(8))
146        self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
147        self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
148        if param_dict["input_narrow_range"] is not None:
149            self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
150        if param_dict["output_narrow_range"] is not None:
151            self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
152        if param_dict["input_maxq"] == 'None':
153            self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
154            self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
155        elif param_dict["input_maxq"] is not None:
156            self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
157            self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
158
159        self.weight = weight
160        self.bias = bias
161        self.has_bias = bias is not None
162        self.activation = activation
163        self.has_act = activation is not None
164        self.bias_add = P.BiasAdd()
165
166    def construct(self, x):
167        if self.has_bias:
168            x = self.core_op(x, self.weight)
169            x = self.bias_add(x, self.bias)
170        else:
171            x = self.core_op(x, self.weight)
172        if self.has_act:
173            x = self.activation(x)
174        return x
175
176    def extend_repr(self):
177        s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
178        if self.has_bias:
179            s += f', bias=shape[{self.bias.shape}]'
180        if self.has_act:
181            s += f', activation={self.activation}'
182        return s
183
184
185class ExportToQuantInferNetwork:
186    """
187    Convert quantization aware network to infer network.
188
189    Args:
190        network (Cell): MindSpore quantization aware training network.
191        inputs (Tensor): Input tensors of the `quantization aware training network`.
192        mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
193          Default: 127.5.
194        std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
195          of network. Default: 127.5.
196        is_mindir (bool): Whether export MINDIR format. Default: False.
197
198    Returns:
199        Cell, Infer network.
200    """
201
202    def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
203        network = Validator.check_isinstance('network', network, (nn.Cell,))
204        self.data_type = mstype.int8
205        self.network = copy.deepcopy(network)
206        self.network_bk = copy.deepcopy(network)
207        self.get_inputs_table(inputs)
208        self.mean = mean
209        self.std_dev = std_dev
210        self.is_mindir = is_mindir
211        self.upcell = None
212
213    def get_inputs_table(self, inputs):
214        """Get the input quantization parameters of quantization cell for quant export."""
215        phase_name = 'export_quant'
216        graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
217        self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
218
219    def run(self):
220        """Start to convert."""
221        self.network.update_cell_prefix()
222        network = self.network
223        if isinstance(network, _AddFakeQuantInput):
224            network = network.network
225        network = self._convert_quant2deploy(network)
226        return network
227
228    def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
229        """convert network's quant subcell to deploy subcell"""
230        scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
231
232        # Build the `Quant` `Dequant` op.
233        # Quant only support perlayer version. Need check here.
234        quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
235        scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
236        dequant_op = inner.Dequant()
237
238        if isinstance(activation, _AddFakeQuantAfterSubCell):
239            activation = activation.subcell
240        elif hasattr(activation, "get_origin"):
241            activation = activation.get_origin()
242
243        # get op
244        if isinstance(cell_core, quant.DenseQuant):
245            op_core = P.MatMul()
246        else:
247            op_core = cell_core.conv
248
249        # get the `weight` and `bias`
250        weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
251
252        if self.is_mindir:
253            block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
254        else:
255            block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
256        return block
257
258    def _get_input_quant_param(self, minq_name, np_type, param_dict):
259        """get input quant parameter for quant block"""
260        fake_quant_a_in_prefix = minq_name[:-5]
261        cells = self.network_bk.cells_and_names()
262        for cell in cells:
263            if cell[0].endswith(fake_quant_a_in_prefix):
264                fake_quant_a_in = cell[1]
265                break
266        scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
267            quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
268        param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
269        return scale_a_in, zp_a_in
270
271    def __get_quant_param(self, cell_core, fake_quant_a_out):
272        """get parameter for quant block"""
273        w_minq_name = cell_core.fake_quant_weight.minq.name
274        w_maxq_name = cell_core.fake_quant_weight.maxq.name
275        np_type = mstype.dtype_to_nptype(self.data_type)
276        param_dict = dict()
277        param_dict["filter_maxq"] = None
278        param_dict["filter_minq"] = None
279        param_dict["output_maxq"] = None
280        param_dict["output_minq"] = None
281        param_dict["input_maxq"] = None
282        param_dict["input_minq"] = None
283        param_dict["input_narrow_range"] = None
284        param_dict["output_narrow_range"] = None
285        param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
286        param_dict["mean"] = self.mean
287        param_dict["std_dev"] = self.std_dev
288        param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
289        param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
290
291        scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
292            quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
293        if fake_quant_a_out is not None:
294            _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
295                quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
296            param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
297
298        info = self.quant_info_table.get(w_minq_name, None)
299        if not info:
300            info = self.quant_info_table.get(w_maxq_name, None)
301        if info:
302            _, minq_name = info
303            if minq_name == 'input':
304                scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
305                    (1 / self.std_dev), round(self.mean), 'None', 'None'
306            else:
307                scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
308        else:
309            # skip quant layer
310            scale_a_in, zp_a_in = 1.0, 0.0
311        return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
312
313    @staticmethod
314    def __get_dequant_scale(scale_a_in, scale_w):
315        """Get dequant scale"""
316        scale_deq = scale_a_in * scale_w
317
318        # fuse parameter
319        # |--------|47:40|--------|39:32|--------|31:0|
320        #         offset_w [8]    shift_N [8]    deq_scale [32]
321        float32_deq_scale = scale_deq.astype(np.float32)
322        uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
323        scale_length = scale_deq.size  # channel
324        dequant_param = np.zeros(scale_length, dtype=np.uint64)
325        for index in range(scale_length):
326            dequant_param[index] += uint32_deq_scale[index]
327        scale_deq = Tensor(dequant_param, mstype.uint64)
328        return scale_deq
329
330    def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
331        """Get weight and bias for quantizaiton"""
332        np_type = mstype.dtype_to_nptype(self.data_type)
333        weight = cell_core.weight.data.asnumpy()
334        bias = None
335        if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
336            if cell_core.has_bias:
337                bias = cell_core.bias.data.asnumpy()
338        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
339            weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
340        elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
341            weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
342        weight_b = weight
343        bias_b = bias
344        # apply the quant
345        quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
346                                                             cell_core.fake_quant_weight.num_bits,
347                                                             cell_core.fake_quant_weight.narrow_range)
348        weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
349        if bias is not None:
350            bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
351
352        if isinstance(cell_core, quant.DenseQuant):
353            weight = np.transpose(weight)
354            weight_b = np.transpose(weight_b)
355
356        weight = Tensor(weight, self.data_type)
357        weight_b = Tensor(weight_b)
358        if bias_b is not None:
359            bias_b = Tensor(bias_b, mstype.float32)
360        return weight, bias, weight_b, bias_b
361
362    def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
363        """add output quant info for quant op for export mindir."""
364        if self.is_mindir:
365            if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
366                np_type = mstype.dtype_to_nptype(self.data_type)
367                _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
368                origin_op.add_prim_attr('output_maxq', Tensor(maxq))
369                origin_op.add_prim_attr('output_minq', Tensor(minq))
370
371    def _convert_subcell(self, network, change, name, subcell):
372        """Convert subcell to ant subcell."""
373        if subcell is not None and hasattr(subcell, "fake_quant_weight"):
374            new_subcell = self._get_quant_block(subcell, None, None)
375            prefix = subcell.param_prefix
376            new_subcell.update_parameters_name(prefix + '.')
377            self.upcell = new_subcell
378            network.insert_child_to_cell(name, new_subcell)
379            change = True
380        return network, change
381
382    def _convert_conv(self, network, change, name, subcell):
383        """Convert subcell to ant subcell for conv."""
384        cell_core = subcell.conv
385        activation = subcell.activation
386        fake_quant_act = None
387        if hasattr(activation, 'fake_quant_act_before'):
388            fake_quant_act = activation.fake_quant_act_before
389        elif hasattr(activation, 'fake_quant_act'):
390            fake_quant_act = activation.fake_quant_act
391        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
392            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
393            self.upcell = None
394            prefix = subcell.param_prefix
395            new_subcell.update_parameters_name(prefix + '.')
396            network.insert_child_to_cell(name, new_subcell)
397            change = True
398        return network, change
399
400    def _convert_dense(self, network, change, name, subcell):
401        """Convert subcell to ant subcell for dense."""
402        cell_core = subcell.dense
403        activation = subcell.activation
404        fake_quant_act = None
405        if hasattr(activation, 'fake_quant_act_before'):
406            fake_quant_act = activation.fake_quant_act_before
407        elif hasattr(activation, 'fake_quant_act'):
408            fake_quant_act = activation.fake_quant_act
409        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
410            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
411            prefix = subcell.param_prefix
412            new_subcell.update_parameters_name(prefix + '.')
413            network.insert_child_to_cell(name, new_subcell)
414            self.upcell = None
415            change = True
416        return network, change
417
418    def _convert_act(self, subcell):
419        """Convert subcell to ant subcell for activation."""
420        activation = subcell.get_origin()
421        if isinstance(activation, nn.ReLU):
422            self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
423        elif isinstance(activation, nn.ReLU6):
424            self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
425        if self.upcell:
426            self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
427        return activation
428
429    def _convert_add(self, subcell):
430        """Convert subcell to ant subcell for add."""
431        if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
432            add_op = subcell.add.subcell
433            subcell.__delattr__("add")
434            subcell.__setattr__("add", add_op)
435        add_op = subcell.add
436        self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
437        subcell.__delattr__("fake_quant_act")
438        subcell.__setattr__("fake_quant_act", P.identity())
439
440    def _convert_observer(self, network, name, subcell):
441        """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
442        if self.upcell:
443            self._add_output_min_max_for_op(self.upcell.core_op, subcell)
444        network.__delattr__(name)
445        network.__setattr__(name, P.identity())
446
447    def _convert_fake_quant_after_cell(self, network, name, subcell):
448        """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
449        op = subcell.subcell
450        self._add_output_min_max_for_op(op, subcell.fake_quant_act)
451        network.__delattr__(name)
452        network.__setattr__(name, op)
453
454    def _convert_core_quant_subcell(self, network, change, name, subcell):
455        """Convert subcell to ant subcell for conv and dense."""
456        is_core_subcell = True
457        if isinstance(subcell, nn.Conv2dBnAct):
458            network, change = self._convert_conv(network, change, name, subcell)
459        elif isinstance(subcell, nn.DenseBnAct):
460            network, change = self._convert_dense(network, change, name, subcell)
461        elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
462                                  quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
463            network, change = self._convert_subcell(network, change, name, subcell)
464        else:
465            is_core_subcell = False
466        return is_core_subcell, network, change
467
468    def _convert_other_quant_subcell(self, network, change, name, subcell):
469        """Convert subcell to ant subcell for cell except conv and dense."""
470        is_other_subcell = True
471        if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
472            activation = self._convert_act(subcell)
473            network.insert_child_to_cell(name, activation)
474            change = True
475        elif isinstance(subcell, nn.TensorAddQuant):
476            self._convert_add(subcell)
477        elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
478            self._convert_observer(network, name, subcell)
479        elif isinstance(subcell, _AddFakeQuantAfterSubCell):
480            self._convert_fake_quant_after_cell(network, name, subcell)
481            change = True
482        else:
483            is_other_subcell = False
484        return is_other_subcell, network, change
485
486    def _convert_quant2deploy(self, network):
487        """Convert network's all quant subcell to deploy subcell."""
488        cells = network.name_cells()
489        change = False
490        for name in cells:
491            subcell = cells[name]
492            if subcell == network:
493                continue
494            is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
495            is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
496            if not is_core_quant_subcell and not is_other_quant_subcell:
497                self.upcell = None
498                self._convert_quant2deploy(subcell)
499        if isinstance(network, nn.SequentialCell) and change:
500            network.cell_list = list(network.cells())
501        return network
502