1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""Export for quantization.""" 16 17import copy 18 19import numpy as np 20 21from ... import nn, ops 22from ..._checkparam import Validator 23from ...common import Tensor 24from ...common import dtype as mstype 25from ...common.api import _cell_graph_executor as _executor 26from ...common.parameter import Parameter 27from ...nn import Cell 28from ...nn.layer import quant 29from ...ops import operations as P 30from ...ops import functional as F 31from ...ops.operations import _inner_ops as inner 32from ..quant import quant_utils 33from ..quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell 34 35 36__all__ = ["ExportToQuantInferNetwork"] 37 38 39class QuantBlock(Cell): 40 r""" 41 A quant block of Conv/Dense, activation layer for Ascend deploy. 42 43 Calculate Conv or Dense in Int8, with Quant and DeQuant. 44 45 Notes: 46 This block is only for deploy, and not trainable. 47 48 Args: 49 in_channels (int): The number of channels in the input space. 50 out_channels (int): The number of channels in the output space. 51 weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype 52 is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. 53 bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is 54 same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. 55 has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. 56 activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None. 57 batchnorm (bool): Specifies to used batchnorm or not. Default: None. 58 activation (string): Specifies activation type. The optional values are as following: 59 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', 60 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. 61 62 Inputs: 63 - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. 64 65 Outputs: 66 Tensor of shape :math:`(N, out\_channels)`. 67 """ 68 69 def __init__(self, 70 core_op, 71 weight, 72 quant_op, 73 dequant_op, 74 dequant_scale, 75 bias=None, 76 activation=None): 77 super(QuantBlock, self).__init__() 78 self.core_op = core_op 79 self.weight = weight 80 self.quant = quant_op 81 self.dequant = dequant_op 82 self.dequant_scale = dequant_scale 83 self.bias = bias 84 self.has_bias = bias is not None 85 self.activation = activation 86 self.has_act = activation is not None 87 self.bias_add = P.BiasAdd() 88 self.sub = P.Sub() 89 self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset') 90 91 def construct(self, x): 92 x = self.quant(x) 93 if self.has_bias: 94 weight = self.sub(self.weight, self.weight_offset) 95 x = self.core_op(x, weight) 96 x = self.bias_add(x, self.bias) 97 else: 98 x = self.core_op(x, self.weight) 99 x = self.dequant(x, self.dequant_scale) 100 x = F.cast(x, mstype.float32) 101 if self.has_act: 102 x = self.activation(x) 103 return x 104 105 def extend_repr(self): 106 s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]' 107 if self.has_bias: 108 s += f', bias=shape[{self.bias.shape}]' 109 if self.has_act: 110 s += f', activation={self.activation}' 111 s += f', dequant={self.dequant}' 112 return s 113 114 115class QuantMindirBlock(Cell): 116 """A quant binary block of Conv/Dense, activation layer for export MINDIR model. 117 118 Args: 119 core_op (Cell): The operation cell. 120 weight (Tensor): The weight of the cell. 121 bias (Tensor): The bias of the cell. Default: None. 122 activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None. 123 param_dict (dict): The information of the cell. 124 """ 125 126 def __init__(self, 127 core_op, 128 weight, 129 bias=None, 130 activation=None, 131 param_dict=None): 132 133 super(QuantMindirBlock, self).__init__() 134 self.core_op = core_op 135 if activation is not None: 136 self.core_op.add_prim_attr("activation_name", activation.__class__.__name__) 137 self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"])) 138 self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"])) 139 if param_dict["output_maxq"] is not None: 140 self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"])) 141 self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"])) 142 self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"])) 143 if hasattr(core_op, 'pad_mode'): 144 self.core_op.add_prim_attr("pad_mode", core_op.pad_mode) 145 self.core_op.add_prim_attr("act_num_bits", Tensor(8)) 146 self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"])) 147 self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"])) 148 if param_dict["input_narrow_range"] is not None: 149 self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"])) 150 if param_dict["output_narrow_range"] is not None: 151 self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"])) 152 if param_dict["input_maxq"] == 'None': 153 self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"])) 154 self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"])) 155 elif param_dict["input_maxq"] is not None: 156 self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"])) 157 self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"])) 158 159 self.weight = weight 160 self.bias = bias 161 self.has_bias = bias is not None 162 self.activation = activation 163 self.has_act = activation is not None 164 self.bias_add = P.BiasAdd() 165 166 def construct(self, x): 167 if self.has_bias: 168 x = self.core_op(x, self.weight) 169 x = self.bias_add(x, self.bias) 170 else: 171 x = self.core_op(x, self.weight) 172 if self.has_act: 173 x = self.activation(x) 174 return x 175 176 def extend_repr(self): 177 s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]' 178 if self.has_bias: 179 s += f', bias=shape[{self.bias.shape}]' 180 if self.has_act: 181 s += f', activation={self.activation}' 182 return s 183 184 185class ExportToQuantInferNetwork: 186 """ 187 Convert quantization aware network to infer network. 188 189 Args: 190 network (Cell): MindSpore quantization aware training network. 191 inputs (Tensor): Input tensors of the `quantization aware training network`. 192 mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network. 193 Default: 127.5. 194 std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer 195 of network. Default: 127.5. 196 is_mindir (bool): Whether export MINDIR format. Default: False. 197 198 Returns: 199 Cell, Infer network. 200 """ 201 202 def __init__(self, network, mean, std_dev, *inputs, is_mindir=False): 203 network = Validator.check_isinstance('network', network, (nn.Cell,)) 204 self.data_type = mstype.int8 205 self.network = copy.deepcopy(network) 206 self.network_bk = copy.deepcopy(network) 207 self.get_inputs_table(inputs) 208 self.mean = mean 209 self.std_dev = std_dev 210 self.is_mindir = is_mindir 211 self.upcell = None 212 213 def get_inputs_table(self, inputs): 214 """Get the input quantization parameters of quantization cell for quant export.""" 215 phase_name = 'export_quant' 216 graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False) 217 self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id) 218 219 def run(self): 220 """Start to convert.""" 221 self.network.update_cell_prefix() 222 network = self.network 223 if isinstance(network, _AddFakeQuantInput): 224 network = network.network 225 network = self._convert_quant2deploy(network) 226 return network 227 228 def _get_quant_block(self, cell_core, activation, fake_quant_a_out): 229 """convert network's quant subcell to deploy subcell""" 230 scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out) 231 232 # Build the `Quant` `Dequant` op. 233 # Quant only support perlayer version. Need check here. 234 quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in)) 235 scale_deq = self.__get_dequant_scale(scale_a_in, scale_w) 236 dequant_op = inner.Dequant() 237 238 if isinstance(activation, _AddFakeQuantAfterSubCell): 239 activation = activation.subcell 240 elif hasattr(activation, "get_origin"): 241 activation = activation.get_origin() 242 243 # get op 244 if isinstance(cell_core, quant.DenseQuant): 245 op_core = P.MatMul() 246 else: 247 op_core = cell_core.conv 248 249 # get the `weight` and `bias` 250 weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w) 251 252 if self.is_mindir: 253 block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict) 254 else: 255 block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) 256 return block 257 258 def _get_input_quant_param(self, minq_name, np_type, param_dict): 259 """get input quant parameter for quant block""" 260 fake_quant_a_in_prefix = minq_name[:-5] 261 cells = self.network_bk.cells_and_names() 262 for cell in cells: 263 if cell[0].endswith(fake_quant_a_in_prefix): 264 fake_quant_a_in = cell[1] 265 break 266 scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ 267 quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type) 268 param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range 269 return scale_a_in, zp_a_in 270 271 def __get_quant_param(self, cell_core, fake_quant_a_out): 272 """get parameter for quant block""" 273 w_minq_name = cell_core.fake_quant_weight.minq.name 274 w_maxq_name = cell_core.fake_quant_weight.maxq.name 275 np_type = mstype.dtype_to_nptype(self.data_type) 276 param_dict = dict() 277 param_dict["filter_maxq"] = None 278 param_dict["filter_minq"] = None 279 param_dict["output_maxq"] = None 280 param_dict["output_minq"] = None 281 param_dict["input_maxq"] = None 282 param_dict["input_minq"] = None 283 param_dict["input_narrow_range"] = None 284 param_dict["output_narrow_range"] = None 285 param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range 286 param_dict["mean"] = self.mean 287 param_dict["std_dev"] = self.std_dev 288 param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric 289 param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits 290 291 scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \ 292 quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type) 293 if fake_quant_a_out is not None: 294 _, _, param_dict["output_maxq"], param_dict["output_minq"] = \ 295 quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type) 296 param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range 297 298 info = self.quant_info_table.get(w_minq_name, None) 299 if not info: 300 info = self.quant_info_table.get(w_maxq_name, None) 301 if info: 302 _, minq_name = info 303 if minq_name == 'input': 304 scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ 305 (1 / self.std_dev), round(self.mean), 'None', 'None' 306 else: 307 scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict) 308 else: 309 # skip quant layer 310 scale_a_in, zp_a_in = 1.0, 0.0 311 return scale_a_in, zp_a_in, scale_w, zp_w, param_dict 312 313 @staticmethod 314 def __get_dequant_scale(scale_a_in, scale_w): 315 """Get dequant scale""" 316 scale_deq = scale_a_in * scale_w 317 318 # fuse parameter 319 # |--------|47:40|--------|39:32|--------|31:0| 320 # offset_w [8] shift_N [8] deq_scale [32] 321 float32_deq_scale = scale_deq.astype(np.float32) 322 uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32) 323 scale_length = scale_deq.size # channel 324 dequant_param = np.zeros(scale_length, dtype=np.uint64) 325 for index in range(scale_length): 326 dequant_param[index] += uint32_deq_scale[index] 327 scale_deq = Tensor(dequant_param, mstype.uint64) 328 return scale_deq 329 330 def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w): 331 """Get weight and bias for quantizaiton""" 332 np_type = mstype.dtype_to_nptype(self.data_type) 333 weight = cell_core.weight.data.asnumpy() 334 bias = None 335 if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): 336 if cell_core.has_bias: 337 bias = cell_core.bias.data.asnumpy() 338 elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)): 339 weight, bias = quant_utils.fold_batchnorm(weight, cell_core) 340 elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant): 341 weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core) 342 weight_b = weight 343 bias_b = bias 344 # apply the quant 345 quant_min, quant_max = quant_utils.get_quant_min_max(np_type, 346 cell_core.fake_quant_weight.num_bits, 347 cell_core.fake_quant_weight.narrow_range) 348 weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max) 349 if bias is not None: 350 bias = Tensor(bias / scale_a_in / scale_w, mstype.int32) 351 352 if isinstance(cell_core, quant.DenseQuant): 353 weight = np.transpose(weight) 354 weight_b = np.transpose(weight_b) 355 356 weight = Tensor(weight, self.data_type) 357 weight_b = Tensor(weight_b) 358 if bias_b is not None: 359 bias_b = Tensor(bias_b, mstype.float32) 360 return weight, bias, weight_b, bias_b 361 362 def _add_output_min_max_for_op(self, origin_op, fake_quant_cell): 363 """add output quant info for quant op for export mindir.""" 364 if self.is_mindir: 365 if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'): 366 np_type = mstype.dtype_to_nptype(self.data_type) 367 _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type) 368 origin_op.add_prim_attr('output_maxq', Tensor(maxq)) 369 origin_op.add_prim_attr('output_minq', Tensor(minq)) 370 371 def _convert_subcell(self, network, change, name, subcell): 372 """Convert subcell to ant subcell.""" 373 if subcell is not None and hasattr(subcell, "fake_quant_weight"): 374 new_subcell = self._get_quant_block(subcell, None, None) 375 prefix = subcell.param_prefix 376 new_subcell.update_parameters_name(prefix + '.') 377 self.upcell = new_subcell 378 network.insert_child_to_cell(name, new_subcell) 379 change = True 380 return network, change 381 382 def _convert_conv(self, network, change, name, subcell): 383 """Convert subcell to ant subcell for conv.""" 384 cell_core = subcell.conv 385 activation = subcell.activation 386 fake_quant_act = None 387 if hasattr(activation, 'fake_quant_act_before'): 388 fake_quant_act = activation.fake_quant_act_before 389 elif hasattr(activation, 'fake_quant_act'): 390 fake_quant_act = activation.fake_quant_act 391 if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): 392 new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) 393 self.upcell = None 394 prefix = subcell.param_prefix 395 new_subcell.update_parameters_name(prefix + '.') 396 network.insert_child_to_cell(name, new_subcell) 397 change = True 398 return network, change 399 400 def _convert_dense(self, network, change, name, subcell): 401 """Convert subcell to ant subcell for dense.""" 402 cell_core = subcell.dense 403 activation = subcell.activation 404 fake_quant_act = None 405 if hasattr(activation, 'fake_quant_act_before'): 406 fake_quant_act = activation.fake_quant_act_before 407 elif hasattr(activation, 'fake_quant_act'): 408 fake_quant_act = activation.fake_quant_act 409 if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): 410 new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) 411 prefix = subcell.param_prefix 412 new_subcell.update_parameters_name(prefix + '.') 413 network.insert_child_to_cell(name, new_subcell) 414 self.upcell = None 415 change = True 416 return network, change 417 418 def _convert_act(self, subcell): 419 """Convert subcell to ant subcell for activation.""" 420 activation = subcell.get_origin() 421 if isinstance(activation, nn.ReLU): 422 self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act) 423 elif isinstance(activation, nn.ReLU6): 424 self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act) 425 if self.upcell: 426 self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act) 427 return activation 428 429 def _convert_add(self, subcell): 430 """Convert subcell to ant subcell for add.""" 431 if isinstance(subcell.add, _AddFakeQuantAfterSubCell): 432 add_op = subcell.add.subcell 433 subcell.__delattr__("add") 434 subcell.__setattr__("add", add_op) 435 add_op = subcell.add 436 self._add_output_min_max_for_op(add_op, subcell.fake_quant_act) 437 subcell.__delattr__("fake_quant_act") 438 subcell.__setattr__("fake_quant_act", P.identity()) 439 440 def _convert_observer(self, network, name, subcell): 441 """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver.""" 442 if self.upcell: 443 self._add_output_min_max_for_op(self.upcell.core_op, subcell) 444 network.__delattr__(name) 445 network.__setattr__(name, P.identity()) 446 447 def _convert_fake_quant_after_cell(self, network, name, subcell): 448 """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell.""" 449 op = subcell.subcell 450 self._add_output_min_max_for_op(op, subcell.fake_quant_act) 451 network.__delattr__(name) 452 network.__setattr__(name, op) 453 454 def _convert_core_quant_subcell(self, network, change, name, subcell): 455 """Convert subcell to ant subcell for conv and dense.""" 456 is_core_subcell = True 457 if isinstance(subcell, nn.Conv2dBnAct): 458 network, change = self._convert_conv(network, change, name, subcell) 459 elif isinstance(subcell, nn.DenseBnAct): 460 network, change = self._convert_dense(network, change, name, subcell) 461 elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv, 462 quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)): 463 network, change = self._convert_subcell(network, change, name, subcell) 464 else: 465 is_core_subcell = False 466 return is_core_subcell, network, change 467 468 def _convert_other_quant_subcell(self, network, change, name, subcell): 469 """Convert subcell to ant subcell for cell except conv and dense.""" 470 is_other_subcell = True 471 if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"): 472 activation = self._convert_act(subcell) 473 network.insert_child_to_cell(name, activation) 474 change = True 475 elif isinstance(subcell, nn.TensorAddQuant): 476 self._convert_add(subcell) 477 elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver): 478 self._convert_observer(network, name, subcell) 479 elif isinstance(subcell, _AddFakeQuantAfterSubCell): 480 self._convert_fake_quant_after_cell(network, name, subcell) 481 change = True 482 else: 483 is_other_subcell = False 484 return is_other_subcell, network, change 485 486 def _convert_quant2deploy(self, network): 487 """Convert network's all quant subcell to deploy subcell.""" 488 cells = network.name_cells() 489 change = False 490 for name in cells: 491 subcell = cells[name] 492 if subcell == network: 493 continue 494 is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell) 495 is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell) 496 if not is_core_quant_subcell and not is_other_quant_subcell: 497 self.upcell = None 498 self._convert_quant2deploy(subcell) 499 if isinstance(network, nn.SequentialCell) and change: 500 network.cell_list = list(network.cells()) 501 return network 502