1 //===- FakeQuantSupport.h - Support utilities for FakeQuant ops -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines support utilities for interoperating with FakeQuant* based 10 // QAT (Quantized Aware Training) computations, as implemented by TFLite. Note 11 // that FakeQuant* operators mix multiple concerns specific to how TFLite 12 // originally implemented quantization. As such, utilities here enforce 13 // opinions taken by that codebase (vs providing any amount of genericity). 14 // 15 // Specifically, it combines the following concerns, each of which would be 16 // independent variables in a more generic setup: 17 // - numBits and isSigned imply storage data type (uint8, int8, int16) 18 // - numBits < 8 is promoted to uint8 or int8 19 // - "narrow_range" narrows the lower bound of the storage type's range by 20 // 1 21 // - the specified min/max values are "nudged" so that the result has a zero 22 // that can be exactly expressed 23 // - min=max=0 implies scale=0 and zero_point=0 24 // 25 // With the above assumptions applied, every conforming specified FakeQuant op 26 // can be represented by a UniformQuantizedType. This scheme is not expected to 27 // be generalized further in the future and should be considered to be a 28 // legacy set of rules. 29 // 30 // As canonically used in TensorFlow graphs, the presence of a FakeQuant node 31 // is a hint that the specific math represented here has been simulated at 32 // training time. As such, it is usually not advised to arbitrarily change 33 // quantization parameters derived from FakeQuant. 34 // 35 //===----------------------------------------------------------------------===// 36 37 #ifndef MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ 38 #define MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ 39 40 #include "mlir/Dialect/Quant/QuantTypes.h" 41 42 namespace mlir { 43 namespace quant { 44 45 /// Converts per-layer FakeQuant attributes to the corresponding type. 46 /// In the event that the parameters cannot be converted, returns a nullptr 47 /// convertible Type and issues an appropriate error. 48 /// Note that there are multiple variants of a per-layer FakeQuant op, so 49 /// this function takes the attributes discretely vs taking a reference to the 50 /// originating op. 51 UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits, 52 double rmin, double rmax, 53 bool narrowRange, Type expressedType, 54 bool isSigned = false); 55 56 /// Converts per-channel FakeQuant attributes to the corresponding type. 57 /// In the event that the parameters cannot be converted, returns a nullptr 58 /// convertible Type and issues an appropriate error. 59 UniformQuantizedPerAxisType 60 fakeQuantAttrsToType(Location loc, unsigned numBits, int32_t quantizedDimension, 61 ArrayRef<double> rmins, ArrayRef<double> rmax, 62 bool narrowRange, Type expressedType, 63 bool isSigned = false); 64 } // namespace quant 65 } // namespace mlir 66 67 #endif // MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ 68