1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This transformation pass convert dense tensor to sparse format.
17
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
21 #include "mlir/IR/Attributes.h" // from @llvm-project
22 #include "mlir/IR/Builders.h" // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h" // from @llvm-project
24 #include "mlir/Pass/Pass.h" // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
27
28 //===----------------------------------------------------------------------===//
29 // The DenseToSparse Pass.
30 //
31 namespace mlir {
32 namespace TFL {
33
34 namespace {
35 // If sparsity level is below this threadshold, keep the tensor in dense format.
36 const float kMinSparsityLevel = 0.3;
37 // Heuristic to check if a block configuration is correct.
38 const float kBlockOverRandomSparsityRatio = 0.9;
39
APFloatToEigenHalf(const APFloat & val)40 Eigen::half APFloatToEigenHalf(const APFloat& val) {
41 uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
42 return Eigen::numext::bit_cast<Eigen::half>(raw_data);
43 }
44
EigenHalfToAPFloat(const Eigen::half & val)45 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
46 uint16_t raw_data = Eigen::numext::bit_cast<uint16_t>(val);
47 return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
48 }
49
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)50 void PopulateEncodingParams(const std::vector<int>& block_size,
51 std::vector<int>* traversal_order,
52 std::vector<TfLiteDimensionType>* format,
53 std::vector<int>* b_map, std::vector<int>* b_size) {
54 const int dims_count = block_size.size();
55 traversal_order->resize(dims_count);
56 format->resize(dims_count);
57 for (int i = 0; i < dims_count; i++) {
58 (*traversal_order)[i] = i;
59 }
60 for (int i = 0; i < dims_count - 1; i++) {
61 (*format)[i] = kTfLiteDimDense;
62 }
63 (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
64 *b_map = {};
65 *b_size = {};
66 int block_rank = 0;
67 for (int i = 0; i < dims_count; i++) {
68 if (block_size[i] != 1) {
69 traversal_order->push_back(block_rank + dims_count);
70 format->push_back(kTfLiteDimDense);
71 block_rank++;
72 b_map->push_back(i);
73 b_size->push_back(block_size[i]);
74 }
75 }
76 }
77
GetSparsity(const int num_zeros,const int num_elements)78 inline float GetSparsity(const int num_zeros, const int num_elements) {
79 return (1.0 * num_zeros / num_elements);
80 }
81
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)82 float CalculateRandomSparsity(const ElementsAttr& attr,
83 const ShapedType& type) {
84 int num_elements = type.getNumElements();
85 int num_zeros = 0;
86
87 if (type.getElementType().isa<FloatType>()) {
88 for (const auto val : attr.getValues<APFloat>()) {
89 if (val.isZero()) {
90 num_zeros++;
91 }
92 }
93 } else if (type.getElementType().isa<quant::QuantizedType>()) {
94 for (const auto val : attr.getValues<int8_t>()) {
95 if (val == 0) {
96 num_zeros++;
97 }
98 }
99 }
100
101 return GetSparsity(num_zeros, num_elements);
102 }
103
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)104 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
105 const std::vector<int>& block_size) {
106 float sparsity = 0;
107 std::vector<int> shape(2);
108 shape[0] = type.getDimSize(0);
109 shape[1] = type.getDimSize(1);
110
111 std::vector<int> traversal_order = {};
112 std::vector<TfLiteDimensionType> format = {};
113 std::vector<int> b_size = {};
114 std::vector<int> b_map = {};
115 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
116 &b_size);
117
118 if (type.getElementType().isF32()) {
119 tflite::optimize::sparsity::FormatConverter<float> format_converter(
120 shape, traversal_order, format, b_size, b_map);
121 std::vector<float> data;
122 data.reserve(type.getNumElements());
123 for (const auto val : attr.getValues<float>()) data.push_back(val);
124 format_converter.DenseToSparse(data.data());
125 sparsity =
126 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
127 type.getNumElements());
128 } else if (type.getElementType().isF16()) {
129 tflite::optimize::sparsity::FormatConverter<Eigen::half> format_converter(
130 shape, traversal_order, format, b_size, b_map);
131 std::vector<Eigen::half> data;
132 data.reserve(type.getNumElements());
133 for (const auto& val : attr.getValues<APFloat>())
134 data.push_back(APFloatToEigenHalf(val));
135 format_converter.DenseToSparse(data.data());
136 sparsity =
137 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
138 type.getNumElements());
139 } else if (type.getElementType().isa<quant::QuantizedType>()) {
140 tflite::optimize::sparsity::FormatConverter<int8_t> format_converter(
141 shape, traversal_order, format, b_size, b_map);
142 std::vector<int8_t> data;
143 data.reserve(type.getNumElements());
144 for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
145 format_converter.DenseToSparse(data.data());
146 sparsity =
147 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
148 type.getNumElements());
149 }
150
151 return sparsity;
152 }
153
154 typedef struct InspectResult {
155 // Whether the weight tensor is sparse enough to be compressed.
156 bool can_compress;
157 // If the weight tensor cannot be encoded in a block configuration that the op
158 // supports, a Densify() op will be inserted afterwards to fall back to dense
159 // execution.
160 bool needs_densify;
161 // Among the supported block configs of an op, which got selected to encode
162 // the sparse weight.
163 std::vector<int> selected_block_size;
164 } InspectResult;
165
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size)166 InspectResult InspectWeight(
167 Operation* inst,
168 const std::vector<std::vector<int>>& supported_block_size) {
169 ElementsAttr attr;
170 ShapedType type;
171 InspectResult result = {};
172 if (auto cst = dyn_cast<ConstOp>(inst)) {
173 attr = cst.value();
174 type = cst.getType().cast<ShapedType>();
175 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
176 attr = cst.value();
177 type = cst.getType().cast<ShapedType>();
178 } else {
179 result.can_compress = false;
180 return result;
181 }
182
183 // Currently we only support compressing weights of ops:
184 // Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
185 // FullyConnected, whose filter has rank 2.
186 if (type.getRank() != 2 && type.getRank() != 4) {
187 result.can_compress = false;
188 return result;
189 }
190
191 float random_sparsity = CalculateRandomSparsity(attr, type);
192 if (random_sparsity < kMinSparsityLevel) {
193 result.can_compress = false;
194 return result;
195 }
196
197 result.can_compress = true;
198
199 float curr_sparsity = 0;
200 std::vector<int> selected_block_size;
201 result.needs_densify = true;
202 for (const auto& block_size : supported_block_size) {
203 curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
204 if (curr_sparsity / random_sparsity > kBlockOverRandomSparsityRatio) {
205 selected_block_size = block_size;
206 result.can_compress = true;
207 result.needs_densify = false;
208 result.selected_block_size = selected_block_size;
209 break;
210 }
211 }
212
213 return result;
214 }
215
216 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)217 std::vector<T> BuildSparsityParameterAttribute(
218 const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
219 OpBuilder* builder, SparsityParameterAttr* s_param) {
220 ElementsAttr attr;
221 ShapedType type;
222 if (auto cst = dyn_cast<ConstOp>(inst)) {
223 attr = cst.value();
224 type = cst.getType().cast<ShapedType>();
225 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
226 attr = cst.value();
227 type = cst.getType().cast<ShapedType>();
228 } else {
229 assert(false && "Expected a constant-like op");
230 }
231 const int dims_count = type.getRank();
232 std::vector<int> shape(dims_count);
233 for (int i = 0; i < dims_count; i++) {
234 shape[i] = type.getDimSize(i);
235 }
236
237 std::vector<int> traversal_order = {};
238 std::vector<TfLiteDimensionType> format = {};
239 std::vector<int> b_size = {};
240 std::vector<int> b_map = {};
241 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
242 &b_size);
243
244 tflite::optimize::sparsity::FormatConverter<T> format_converter(
245 shape, traversal_order, format, b_size, b_map);
246 format_converter.DenseToSparse(dense_buffer);
247 const auto& metadata = format_converter.GetDimMetadata();
248 const auto& compressed_data = format_converter.GetData();
249 const int dim_size = metadata.size() / 2;
250 std::vector<Attribute> dim_metadata(traversal_order.size());
251 for (int i = 0; i < dim_size; i++) {
252 if (format[i] == kTfLiteDimDense) {
253 dim_metadata[i] = DimensionMetadataAttr::get(
254 builder->getStringAttr("DENSE"),
255 builder->getI32IntegerAttr(metadata[2 * i][0]),
256 builder->getArrayAttr({}), builder->getArrayAttr({}),
257 builder->getContext());
258 } else {
259 dim_metadata[i] = DimensionMetadataAttr::get(
260 builder->getStringAttr("SPARSE_CSR"), builder->getI32IntegerAttr(0),
261 builder->getI32ArrayAttr(metadata[2 * i]),
262 builder->getI32ArrayAttr(metadata[2 * i + 1]), builder->getContext());
263 }
264 }
265 *s_param = SparsityParameterAttr::get(
266 builder->getI32ArrayAttr(traversal_order),
267 builder->getI32ArrayAttr(b_map), builder->getArrayAttr(dim_metadata),
268 builder->getContext());
269
270 return compressed_data;
271 }
272
273 // This pass encodes sparse weights in the model in the proper format, and adds
274 // Densify() op if necessary. The general algorithm is:
275 // 1. Get list of operands (weights) of an op that can be sparse.
276 // 2. Get list of supported block configurations of the op.
277 // 3. Calculate random sparsity of the weight.
278 // 3.1. If sparsity level is below the encoding threshold, keep in dense.
279 // 3.2. If sparsity level is above the encoding threshold, go to 4.
280 // 4. Try to encode the weight with supported block configurations. If the
281 // weight was pruned with the same block config, the blocked sparsity level
282 // should match the random sparsity.
283 // 4.1. Return the matching block config if found.
284 // 4.2. If no matching block config is found, encode the weight with random
285 // sparsity, and add Densify() op to fall back to dense execution.
286 struct DenseToSparse : public PassWrapper<DenseToSparse, FunctionPass> {
287 void runOnFunction() override;
288
getArgumentmlir::TFL::__anon3b4bd91b0111::DenseToSparse289 StringRef getArgument() const final {
290 // This is the argument used to refer to the pass in
291 // the textual format (on the commandline for example).
292 return "tfl-dense-to-sparse";
293 }
getDescriptionmlir::TFL::__anon3b4bd91b0111::DenseToSparse294 StringRef getDescription() const final {
295 // This is a brief description of the pass.
296 return "Convert dense tensor to sparse format.";
297 }
298 };
299
runOnFunction()300 void DenseToSparse::runOnFunction() {
301 FuncOp func = getFunction();
302 OpBuilder builder(func);
303
304 func.walk([&](SparseOpInterface sparse_op) {
305 const auto& sparse_operands = sparse_op.GetSparseOperands();
306 std::vector<std::vector<int>> supported_block_size;
307 for (int operand : sparse_operands) {
308 auto* op = sparse_op.getOperation();
309 auto value = op->getOperand(operand);
310
311 auto* inst = value.getDefiningOp();
312 if (!inst) {
313 continue;
314 }
315
316 // There could be a Dequantize op after the weight tensor in cases like
317 // fp16 post-training quantization. We need to get the weight from the
318 // input of the Dequantize op.
319 if (isa<DequantizeOp>(inst)) {
320 op = inst;
321 value = inst->getOperand(0);
322 inst = value.getDefiningOp();
323 if (!inst) {
324 continue;
325 }
326 operand = 0;
327 }
328
329 ShapedType type;
330 if (isa<ConstOp>(inst)) {
331 supported_block_size = sparse_op.GetFloatBlockSize();
332 type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
333 } else if (isa<QConstOp>(inst)) {
334 supported_block_size = sparse_op.GetQuantizedBlockSize();
335 type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
336 } else {
337 continue;
338 }
339
340 InspectResult result = InspectWeight(inst, supported_block_size);
341 if (!result.can_compress) {
342 continue;
343 }
344
345 // The weight is not block sparse. Encode with random sparsity.
346 if (result.selected_block_size.empty()) {
347 result.selected_block_size = std::vector<int>(type.getRank(), 1);
348 }
349
350 builder.setInsertionPoint(op);
351 SparsityParameterAttr s_param;
352 if (auto cst = dyn_cast<ConstOp>(inst)) {
353 auto attr = cst.value();
354 auto type = cst.getType().cast<ShapedType>();
355 if (type.getElementType().isF32()) {
356 std::vector<float> dense_data;
357 dense_data.reserve(type.getNumElements());
358 for (const auto val : attr.getValues<float>())
359 dense_data.push_back(val);
360 std::vector<float> compressed_data =
361 BuildSparsityParameterAttribute<float>(result.selected_block_size,
362 dense_data.data(), inst,
363 &builder, &s_param);
364 auto compressed_data_type = RankedTensorType::get(
365 {static_cast<int64_t>(compressed_data.size())},
366 builder.getF32Type());
367 auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
368 compressed_data);
369 auto s_const = builder.create<SparseConstOp>(
370 op->getLoc(), cst.value(), s_param, new_value);
371 value.replaceAllUsesWith(s_const.getResult());
372 cst.erase();
373 } else if (type.getElementType().isF16()) {
374 std::vector<Eigen::half> dense_data;
375 dense_data.reserve(type.getNumElements());
376 for (const auto& val : attr.getValues<APFloat>())
377 dense_data.push_back(APFloatToEigenHalf(val));
378 std::vector<Eigen::half> compressed_data =
379 BuildSparsityParameterAttribute<Eigen::half>(
380 result.selected_block_size, dense_data.data(), inst, &builder,
381 &s_param);
382 std::vector<APFloat> apfloat_data;
383 apfloat_data.reserve(type.getNumElements());
384 for (const auto& val : compressed_data)
385 apfloat_data.push_back(EigenHalfToAPFloat(val));
386 auto compressed_data_type = RankedTensorType::get(
387 {static_cast<int64_t>(compressed_data.size())},
388 type.getElementType());
389 auto new_value =
390 DenseElementsAttr::get(compressed_data_type, apfloat_data);
391 auto s_const = builder.create<SparseConstOp>(
392 op->getLoc(), cst.value(), s_param, new_value);
393 value.replaceAllUsesWith(s_const.getResult());
394 cst.erase();
395 }
396 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
397 auto attr = cst.value();
398 auto type = cst.getType().cast<ShapedType>();
399 std::vector<int8_t> dense_data;
400 dense_data.reserve(type.getNumElements());
401 for (const auto& val : attr.getValues<int8_t>())
402 dense_data.push_back(val);
403 std::vector<int8_t> compressed_data =
404 BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
405 dense_data.data(), inst,
406 &builder, &s_param);
407 auto compressed_data_type = RankedTensorType::get(
408 {static_cast<int64_t>(compressed_data.size())},
409 builder.getIntegerType(8, true));
410 auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
411 compressed_data);
412 auto s_qconst = builder.create<SparseQConstOp>(
413 op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
414 value.replaceAllUsesWith(s_qconst.getResult());
415 cst.erase();
416 }
417
418 if (result.needs_densify) {
419 const auto value = op->getOperand(operand);
420 auto densify =
421 builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
422 value.replaceAllUsesWith(densify);
423 densify.setOperand(value);
424 }
425 }
426 });
427 }
428
429 } // namespace
430
431 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()432 std::unique_ptr<OperationPass<FuncOp>> CreateDenseToSparsePass() {
433 return absl::make_unique<DenseToSparse>();
434 }
435
436 static PassRegistration<DenseToSparse> pass;
437
438 } // namespace TFL
439 } // namespace mlir
440