compression/export/quant_export.py

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Export for quantization."""

import copy

import numpy as np

from ... import nn, ops
from ..._checkparam import Validator
from ...common import Tensor
from ...common import dtype as mstype
from ...common.api import _cell_graph_executor as _executor
from ...common.parameter import Parameter
from ...nn import Cell
from ...nn.layer import quant
from ...ops import operations as P
from ...ops import functional as F
from ...ops.operations import _inner_ops as inner
from ..quant import quant_utils
from ..quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell


__all__ = ["ExportToQuantInferNetwork"]


class QuantBlock(Cell):
    r"""
    A quant block of Conv/Dense, activation layer for Ascend deploy.

    Calculate Conv or Dense in Int8, with Quant and DeQuant.

    Notes:
        This block is only for deploy, and not trainable.

    Args:
        in_channels (int): The number of channels in the input space.
        out_channels (int): The number of channels in the output space.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
        batchnorm (bool): Specifies to used batchnorm or not. Default: None.
        activation (string): Specifies activation type. The optional values are as following:
            'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.

    Inputs:
        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.

    Outputs:
        Tensor of shape :math:`(N, out\_channels)`.
    """

    def __init__(self,
                 core_op,
                 weight,
                 quant_op,
                 dequant_op,
                 dequant_scale,
                 bias=None,
                 activation=None):
        super(QuantBlock, self).__init__()
        self.core_op = core_op
        self.weight = weight
        self.quant = quant_op
        self.dequant = dequant_op
        self.dequant_scale = dequant_scale
        self.bias = bias
        self.has_bias = bias is not None
        self.activation = activation
        self.has_act = activation is not None
        self.bias_add = P.BiasAdd()
        self.sub = P.Sub()
        self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')

    def construct(self, x):
        x = self.quant(x)
        if self.has_bias:
            weight = self.sub(self.weight, self.weight_offset)
            x = self.core_op(x, weight)
            x = self.bias_add(x, self.bias)
        else:
            x = self.core_op(x, self.weight)
        x = self.dequant(x, self.dequant_scale)
        x = F.cast(x, mstype.float32)
        if self.has_act:
            x = self.activation(x)
        return x

    def extend_repr(self):
        s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
        if self.has_bias:
            s += f', bias=shape[{self.bias.shape}]'
        if self.has_act:
            s += f', activation={self.activation}'
        s += f', dequant={self.dequant}'
        return s


class QuantMindirBlock(Cell):
    """A quant binary block of Conv/Dense, activation layer for export MINDIR model.

       Args:
        core_op (Cell): The operation cell.
        weight (Tensor): The weight of the cell.
        bias (Tensor): The bias of the cell. Default: None.
        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
        param_dict (dict): The information of the cell.
    """

    def __init__(self,
                 core_op,
                 weight,
                 bias=None,
                 activation=None,
                 param_dict=None):

        super(QuantMindirBlock, self).__init__()
        self.core_op = core_op
        if activation is not None:
            self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
        self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
        self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
        if param_dict["output_maxq"] is not None:
            self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
            self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
        self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
        if hasattr(core_op, 'pad_mode'):
            self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
        self.core_op.add_prim_attr("act_num_bits", Tensor(8))
        self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
        self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
        if param_dict["input_narrow_range"] is not None:
            self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
        if param_dict["output_narrow_range"] is not None:
            self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
        if param_dict["input_maxq"] == 'None':
            self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
            self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
        elif param_dict["input_maxq"] is not None:
            self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
            self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))

        self.weight = weight
        self.bias = bias
        self.has_bias = bias is not None
        self.activation = activation
        self.has_act = activation is not None
        self.bias_add = P.BiasAdd()

    def construct(self, x):
        if self.has_bias:
            x = self.core_op(x, self.weight)
            x = self.bias_add(x, self.bias)
        else:
            x = self.core_op(x, self.weight)
        if self.has_act:
            x = self.activation(x)
        return x

    def extend_repr(self):
        s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
        if self.has_bias:
            s += f', bias=shape[{self.bias.shape}]'
        if self.has_act:
            s += f', activation={self.activation}'
        return s


class ExportToQuantInferNetwork:
    """
    Convert quantization aware network to infer network.

    Args:
        network (Cell): MindSpore quantization aware training network.
        inputs (Tensor): Input tensors of the `quantization aware training network`.
        mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
          Default: 127.5.
        std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
          of network. Default: 127.5.
        is_mindir (bool): Whether export MINDIR format. Default: False.

    Returns:
        Cell, Infer network.
    """

    def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
        network = Validator.check_isinstance('network', network, (nn.Cell,))
        self.data_type = mstype.int8
        self.network = copy.deepcopy(network)
        self.network_bk = copy.deepcopy(network)
        self.get_inputs_table(inputs)
        self.mean = mean
        self.std_dev = std_dev
        self.is_mindir = is_mindir
        self.upcell = None

    def get_inputs_table(self, inputs):
        """Get the input quantization parameters of quantization cell for quant export."""
        phase_name = 'export_quant'
        graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
        self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)

    def run(self):
        """Start to convert."""
        self.network.update_cell_prefix()
        network = self.network
        if isinstance(network, _AddFakeQuantInput):
            network = network.network
        network = self._convert_quant2deploy(network)
        return network

    def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
        """convert network's quant subcell to deploy subcell"""
        scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)

        # Build the `Quant` `Dequant` op.
        # Quant only support perlayer version. Need check here.
        quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
        scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
        dequant_op = inner.Dequant()

        if isinstance(activation, _AddFakeQuantAfterSubCell):
            activation = activation.subcell
        elif hasattr(activation, "get_origin"):
            activation = activation.get_origin()

        # get op
        if isinstance(cell_core, quant.DenseQuant):
            op_core = P.MatMul()
        else:
            op_core = cell_core.conv

        # get the `weight` and `bias`
        weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)

        if self.is_mindir:
            block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
        else:
            block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
        return block

    def _get_input_quant_param(self, minq_name, np_type, param_dict):
        """get input quant parameter for quant block"""
        fake_quant_a_in_prefix = minq_name[:-5]
        cells = self.network_bk.cells_and_names()
        for cell in cells:
            if cell[0].endswith(fake_quant_a_in_prefix):
                fake_quant_a_in = cell[1]
                break
        scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
            quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
        param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
        return scale_a_in, zp_a_in

    def __get_quant_param(self, cell_core, fake_quant_a_out):
        """get parameter for quant block"""
        w_minq_name = cell_core.fake_quant_weight.minq.name
        w_maxq_name = cell_core.fake_quant_weight.maxq.name
        np_type = mstype.dtype_to_nptype(self.data_type)
        param_dict = dict()
        param_dict["filter_maxq"] = None
        param_dict["filter_minq"] = None
        param_dict["output_maxq"] = None
        param_dict["output_minq"] = None
        param_dict["input_maxq"] = None
        param_dict["input_minq"] = None
        param_dict["input_narrow_range"] = None
        param_dict["output_narrow_range"] = None
        param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
        param_dict["mean"] = self.mean
        param_dict["std_dev"] = self.std_dev
        param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
        param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits

        scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
            quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
        if fake_quant_a_out is not None:
            _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
                quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
            param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range

        info = self.quant_info_table.get(w_minq_name, None)
        if not info:
            info = self.quant_info_table.get(w_maxq_name, None)
        if info:
            _, minq_name = info
            if minq_name == 'input':
                scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
                    (1 / self.std_dev), round(self.mean), 'None', 'None'
            else:
                scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
        else:
            # skip quant layer
            scale_a_in, zp_a_in = 1.0, 0.0
        return scale_a_in, zp_a_in, scale_w, zp_w, param_dict

    @staticmethod
    def __get_dequant_scale(scale_a_in, scale_w):
        """Get dequant scale"""
        scale_deq = scale_a_in * scale_w

        # fuse parameter
        # |--------|47:40|--------|39:32|--------|31:0|
        #         offset_w [8]    shift_N [8]    deq_scale [32]
        float32_deq_scale = scale_deq.astype(np.float32)
        uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
        scale_length = scale_deq.size  # channel
        dequant_param = np.zeros(scale_length, dtype=np.uint64)
        for index in range(scale_length):
            dequant_param[index] += uint32_deq_scale[index]
        scale_deq = Tensor(dequant_param, mstype.uint64)
        return scale_deq

    def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
        """Get weight and bias for quantizaiton"""
        np_type = mstype.dtype_to_nptype(self.data_type)
        weight = cell_core.weight.data.asnumpy()
        bias = None
        if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
            if cell_core.has_bias:
                bias = cell_core.bias.data.asnumpy()
        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
            weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
        elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
            weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
        weight_b = weight
        bias_b = bias
        # apply the quant
        quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
                                                             cell_core.fake_quant_weight.num_bits,
                                                             cell_core.fake_quant_weight.narrow_range)
        weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
        if bias is not None:
            bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)

        if isinstance(cell_core, quant.DenseQuant):
            weight = np.transpose(weight)
            weight_b = np.transpose(weight_b)

        weight = Tensor(weight, self.data_type)
        weight_b = Tensor(weight_b)
        if bias_b is not None:
            bias_b = Tensor(bias_b, mstype.float32)
        return weight, bias, weight_b, bias_b

    def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
        """add output quant info for quant op for export mindir."""
        if self.is_mindir:
            if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
                np_type = mstype.dtype_to_nptype(self.data_type)
                _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
                origin_op.add_prim_attr('output_maxq', Tensor(maxq))
                origin_op.add_prim_attr('output_minq', Tensor(minq))

    def _convert_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell."""
        if subcell is not None and hasattr(subcell, "fake_quant_weight"):
            new_subcell = self._get_quant_block(subcell, None, None)
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            self.upcell = new_subcell
            network.insert_child_to_cell(name, new_subcell)
            change = True
        return network, change

    def _convert_conv(self, network, change, name, subcell):
        """Convert subcell to ant subcell for conv."""
        cell_core = subcell.conv
        activation = subcell.activation
        fake_quant_act = None
        if hasattr(activation, 'fake_quant_act_before'):
            fake_quant_act = activation.fake_quant_act_before
        elif hasattr(activation, 'fake_quant_act'):
            fake_quant_act = activation.fake_quant_act
        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
            self.upcell = None
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            network.insert_child_to_cell(name, new_subcell)
            change = True
        return network, change

    def _convert_dense(self, network, change, name, subcell):
        """Convert subcell to ant subcell for dense."""
        cell_core = subcell.dense
        activation = subcell.activation
        fake_quant_act = None
        if hasattr(activation, 'fake_quant_act_before'):
            fake_quant_act = activation.fake_quant_act_before
        elif hasattr(activation, 'fake_quant_act'):
            fake_quant_act = activation.fake_quant_act
        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            network.insert_child_to_cell(name, new_subcell)
            self.upcell = None
            change = True
        return network, change

    def _convert_act(self, subcell):
        """Convert subcell to ant subcell for activation."""
        activation = subcell.get_origin()
        if isinstance(activation, nn.ReLU):
            self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
        elif isinstance(activation, nn.ReLU6):
            self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
        if self.upcell:
            self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
        return activation

    def _convert_add(self, subcell):
        """Convert subcell to ant subcell for add."""
        if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
            add_op = subcell.add.subcell
            subcell.__delattr__("add")
            subcell.__setattr__("add", add_op)
        add_op = subcell.add
        self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
        subcell.__delattr__("fake_quant_act")
        subcell.__setattr__("fake_quant_act", P.identity())

    def _convert_observer(self, network, name, subcell):
        """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
        if self.upcell:
            self._add_output_min_max_for_op(self.upcell.core_op, subcell)
        network.__delattr__(name)
        network.__setattr__(name, P.identity())

    def _convert_fake_quant_after_cell(self, network, name, subcell):
        """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
        op = subcell.subcell
        self._add_output_min_max_for_op(op, subcell.fake_quant_act)
        network.__delattr__(name)
        network.__setattr__(name, op)

    def _convert_core_quant_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell for conv and dense."""
        is_core_subcell = True
        if isinstance(subcell, nn.Conv2dBnAct):
            network, change = self._convert_conv(network, change, name, subcell)
        elif isinstance(subcell, nn.DenseBnAct):
            network, change = self._convert_dense(network, change, name, subcell)
        elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
                                  quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
            network, change = self._convert_subcell(network, change, name, subcell)
        else:
            is_core_subcell = False
        return is_core_subcell, network, change

    def _convert_other_quant_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell for cell except conv and dense."""
        is_other_subcell = True
        if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
            activation = self._convert_act(subcell)
            network.insert_child_to_cell(name, activation)
            change = True
        elif isinstance(subcell, nn.TensorAddQuant):
            self._convert_add(subcell)
        elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
            self._convert_observer(network, name, subcell)
        elif isinstance(subcell, _AddFakeQuantAfterSubCell):
            self._convert_fake_quant_after_cell(network, name, subcell)
            change = True
        else:
            is_other_subcell = False
        return is_other_subcell, network, change

    def _convert_quant2deploy(self, network):
        """Convert network's all quant subcell to deploy subcell."""
        cells = network.name_cells()
        change = False
        for name in cells:
            subcell = cells[name]
            if subcell == network:
                continue
            is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
            is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
            if not is_core_quant_subcell and not is_other_quant_subcell:
                self.upcell = None
                self._convert_quant2deploy(subcell)
        if isinstance(network, nn.SequentialCell) and change:
            network.cell_list = list(network.cells())
        return network