• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This transformation pass convert dense tensor to sparse format.
17 
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
21 #include "mlir/IR/Attributes.h"  // from @llvm-project
22 #include "mlir/IR/Builders.h"  // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
24 #include "mlir/Pass/Pass.h"  // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
27 
28 //===----------------------------------------------------------------------===//
29 // The DenseToSparse Pass.
30 //
31 namespace mlir {
32 namespace TFL {
33 
34 namespace {
35 // If sparsity level is below this threadshold, keep the tensor in dense format.
36 const float kMinSparsityLevel = 0.3;
37 // Heuristic to check if a block configuration is correct.
38 const float kBlockOverRandomSparsityRatio = 0.9;
39 
APFloatToEigenHalf(const APFloat & val)40 Eigen::half APFloatToEigenHalf(const APFloat& val) {
41   uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
42   return Eigen::numext::bit_cast<Eigen::half>(raw_data);
43 }
44 
EigenHalfToAPFloat(const Eigen::half & val)45 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
46   uint16_t raw_data = Eigen::numext::bit_cast<uint16_t>(val);
47   return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
48 }
49 
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)50 void PopulateEncodingParams(const std::vector<int>& block_size,
51                             std::vector<int>* traversal_order,
52                             std::vector<TfLiteDimensionType>* format,
53                             std::vector<int>* b_map, std::vector<int>* b_size) {
54   const int dims_count = block_size.size();
55   traversal_order->resize(dims_count);
56   format->resize(dims_count);
57   for (int i = 0; i < dims_count; i++) {
58     (*traversal_order)[i] = i;
59   }
60   for (int i = 0; i < dims_count - 1; i++) {
61     (*format)[i] = kTfLiteDimDense;
62   }
63   (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
64   *b_map = {};
65   *b_size = {};
66   int block_rank = 0;
67   for (int i = 0; i < dims_count; i++) {
68     if (block_size[i] != 1) {
69       traversal_order->push_back(block_rank + dims_count);
70       format->push_back(kTfLiteDimDense);
71       block_rank++;
72       b_map->push_back(i);
73       b_size->push_back(block_size[i]);
74     }
75   }
76 }
77 
GetSparsity(const int num_zeros,const int num_elements)78 inline float GetSparsity(const int num_zeros, const int num_elements) {
79   return (1.0 * num_zeros / num_elements);
80 }
81 
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)82 float CalculateRandomSparsity(const ElementsAttr& attr,
83                               const ShapedType& type) {
84   int num_elements = type.getNumElements();
85   int num_zeros = 0;
86 
87   if (type.getElementType().isa<FloatType>()) {
88     for (const auto val : attr.getValues<APFloat>()) {
89       if (val.isZero()) {
90         num_zeros++;
91       }
92     }
93   } else if (type.getElementType().isa<quant::QuantizedType>()) {
94     for (const auto val : attr.getValues<int8_t>()) {
95       if (val == 0) {
96         num_zeros++;
97       }
98     }
99   }
100 
101   return GetSparsity(num_zeros, num_elements);
102 }
103 
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)104 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
105                              const std::vector<int>& block_size) {
106   float sparsity = 0;
107   std::vector<int> shape(2);
108   shape[0] = type.getDimSize(0);
109   shape[1] = type.getDimSize(1);
110 
111   std::vector<int> traversal_order = {};
112   std::vector<TfLiteDimensionType> format = {};
113   std::vector<int> b_size = {};
114   std::vector<int> b_map = {};
115   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
116                          &b_size);
117 
118   if (type.getElementType().isF32()) {
119     tflite::optimize::sparsity::FormatConverter<float> format_converter(
120         shape, traversal_order, format, b_size, b_map);
121     std::vector<float> data;
122     data.reserve(type.getNumElements());
123     for (const auto val : attr.getValues<float>()) data.push_back(val);
124     format_converter.DenseToSparse(data.data());
125     sparsity =
126         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
127                     type.getNumElements());
128   } else if (type.getElementType().isF16()) {
129     tflite::optimize::sparsity::FormatConverter<Eigen::half> format_converter(
130         shape, traversal_order, format, b_size, b_map);
131     std::vector<Eigen::half> data;
132     data.reserve(type.getNumElements());
133     for (const auto& val : attr.getValues<APFloat>())
134       data.push_back(APFloatToEigenHalf(val));
135     format_converter.DenseToSparse(data.data());
136     sparsity =
137         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
138                     type.getNumElements());
139   } else if (type.getElementType().isa<quant::QuantizedType>()) {
140     tflite::optimize::sparsity::FormatConverter<int8_t> format_converter(
141         shape, traversal_order, format, b_size, b_map);
142     std::vector<int8_t> data;
143     data.reserve(type.getNumElements());
144     for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
145     format_converter.DenseToSparse(data.data());
146     sparsity =
147         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
148                     type.getNumElements());
149   }
150 
151   return sparsity;
152 }
153 
154 typedef struct InspectResult {
155   // Whether the weight tensor is sparse enough to be compressed.
156   bool can_compress;
157   // If the weight tensor cannot be encoded in a block configuration that the op
158   // supports, a Densify() op will be inserted afterwards to fall back to dense
159   // execution.
160   bool needs_densify;
161   // Among the supported block configs of an op, which got selected to encode
162   // the sparse weight.
163   std::vector<int> selected_block_size;
164 } InspectResult;
165 
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size)166 InspectResult InspectWeight(
167     Operation* inst,
168     const std::vector<std::vector<int>>& supported_block_size) {
169   ElementsAttr attr;
170   ShapedType type;
171   InspectResult result = {};
172   if (auto cst = dyn_cast<ConstOp>(inst)) {
173     attr = cst.value();
174     type = cst.getType().cast<ShapedType>();
175   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
176     attr = cst.value();
177     type = cst.getType().cast<ShapedType>();
178   } else {
179     result.can_compress = false;
180     return result;
181   }
182 
183   // Currently we only support compressing weights of ops:
184   //   Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
185   //   FullyConnected, whose filter has rank 2.
186   if (type.getRank() != 2 && type.getRank() != 4) {
187     result.can_compress = false;
188     return result;
189   }
190 
191   float random_sparsity = CalculateRandomSparsity(attr, type);
192   if (random_sparsity < kMinSparsityLevel) {
193     result.can_compress = false;
194     return result;
195   }
196 
197   result.can_compress = true;
198 
199   float curr_sparsity = 0;
200   std::vector<int> selected_block_size;
201   result.needs_densify = true;
202   for (const auto& block_size : supported_block_size) {
203     curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
204     if (curr_sparsity / random_sparsity > kBlockOverRandomSparsityRatio) {
205       selected_block_size = block_size;
206       result.can_compress = true;
207       result.needs_densify = false;
208       result.selected_block_size = selected_block_size;
209       break;
210     }
211   }
212 
213   return result;
214 }
215 
216 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)217 std::vector<T> BuildSparsityParameterAttribute(
218     const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
219     OpBuilder* builder, SparsityParameterAttr* s_param) {
220   ElementsAttr attr;
221   ShapedType type;
222   if (auto cst = dyn_cast<ConstOp>(inst)) {
223     attr = cst.value();
224     type = cst.getType().cast<ShapedType>();
225   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
226     attr = cst.value();
227     type = cst.getType().cast<ShapedType>();
228   } else {
229     assert(false && "Expected a constant-like op");
230   }
231   const int dims_count = type.getRank();
232   std::vector<int> shape(dims_count);
233   for (int i = 0; i < dims_count; i++) {
234     shape[i] = type.getDimSize(i);
235   }
236 
237   std::vector<int> traversal_order = {};
238   std::vector<TfLiteDimensionType> format = {};
239   std::vector<int> b_size = {};
240   std::vector<int> b_map = {};
241   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
242                          &b_size);
243 
244   tflite::optimize::sparsity::FormatConverter<T> format_converter(
245       shape, traversal_order, format, b_size, b_map);
246   format_converter.DenseToSparse(dense_buffer);
247   const auto& metadata = format_converter.GetDimMetadata();
248   const auto& compressed_data = format_converter.GetData();
249   const int dim_size = metadata.size() / 2;
250   std::vector<Attribute> dim_metadata(traversal_order.size());
251   for (int i = 0; i < dim_size; i++) {
252     if (format[i] == kTfLiteDimDense) {
253       dim_metadata[i] = DimensionMetadataAttr::get(
254           builder->getStringAttr("DENSE"),
255           builder->getI32IntegerAttr(metadata[2 * i][0]),
256           builder->getArrayAttr({}), builder->getArrayAttr({}),
257           builder->getContext());
258     } else {
259       dim_metadata[i] = DimensionMetadataAttr::get(
260           builder->getStringAttr("SPARSE_CSR"), builder->getI32IntegerAttr(0),
261           builder->getI32ArrayAttr(metadata[2 * i]),
262           builder->getI32ArrayAttr(metadata[2 * i + 1]), builder->getContext());
263     }
264   }
265   *s_param = SparsityParameterAttr::get(
266       builder->getI32ArrayAttr(traversal_order),
267       builder->getI32ArrayAttr(b_map), builder->getArrayAttr(dim_metadata),
268       builder->getContext());
269 
270   return compressed_data;
271 }
272 
273 // This pass encodes sparse weights in the model in the proper format, and adds
274 // Densify() op if necessary. The general algorithm is:
275 //   1. Get list of operands (weights) of an op that can be sparse.
276 //   2. Get list of supported block configurations of the op.
277 //   3. Calculate random sparsity of the weight.
278 //     3.1. If sparsity level is below the encoding threshold, keep in dense.
279 //     3.2. If sparsity level is above the encoding threshold, go to 4.
280 //   4. Try to encode the weight with supported block configurations. If the
281 //      weight was pruned with the same block config, the blocked sparsity level
282 //      should match the random sparsity.
283 //     4.1. Return the matching block config if found.
284 //     4.2. If no matching block config is found, encode the weight with random
285 //          sparsity, and add Densify() op to fall back to dense execution.
286 struct DenseToSparse : public PassWrapper<DenseToSparse, FunctionPass> {
287   void runOnFunction() override;
288 
getArgumentmlir::TFL::__anon3b4bd91b0111::DenseToSparse289   StringRef getArgument() const final {
290     // This is the argument used to refer to the pass in
291     // the textual format (on the commandline for example).
292     return "tfl-dense-to-sparse";
293   }
getDescriptionmlir::TFL::__anon3b4bd91b0111::DenseToSparse294   StringRef getDescription() const final {
295     // This is a brief description of the pass.
296     return "Convert dense tensor to sparse format.";
297   }
298 };
299 
runOnFunction()300 void DenseToSparse::runOnFunction() {
301   FuncOp func = getFunction();
302   OpBuilder builder(func);
303 
304   func.walk([&](SparseOpInterface sparse_op) {
305     const auto& sparse_operands = sparse_op.GetSparseOperands();
306     std::vector<std::vector<int>> supported_block_size;
307     for (int operand : sparse_operands) {
308       auto* op = sparse_op.getOperation();
309       auto value = op->getOperand(operand);
310 
311       auto* inst = value.getDefiningOp();
312       if (!inst) {
313         continue;
314       }
315 
316       // There could be a Dequantize op after the weight tensor in cases like
317       // fp16 post-training quantization. We need to get the weight from the
318       // input of the Dequantize op.
319       if (isa<DequantizeOp>(inst)) {
320         op = inst;
321         value = inst->getOperand(0);
322         inst = value.getDefiningOp();
323         if (!inst) {
324           continue;
325         }
326         operand = 0;
327       }
328 
329       ShapedType type;
330       if (isa<ConstOp>(inst)) {
331         supported_block_size = sparse_op.GetFloatBlockSize();
332         type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
333       } else if (isa<QConstOp>(inst)) {
334         supported_block_size = sparse_op.GetQuantizedBlockSize();
335         type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
336       } else {
337         continue;
338       }
339 
340       InspectResult result = InspectWeight(inst, supported_block_size);
341       if (!result.can_compress) {
342         continue;
343       }
344 
345       // The weight is not block sparse. Encode with random sparsity.
346       if (result.selected_block_size.empty()) {
347         result.selected_block_size = std::vector<int>(type.getRank(), 1);
348       }
349 
350       builder.setInsertionPoint(op);
351       SparsityParameterAttr s_param;
352       if (auto cst = dyn_cast<ConstOp>(inst)) {
353         auto attr = cst.value();
354         auto type = cst.getType().cast<ShapedType>();
355         if (type.getElementType().isF32()) {
356           std::vector<float> dense_data;
357           dense_data.reserve(type.getNumElements());
358           for (const auto val : attr.getValues<float>())
359             dense_data.push_back(val);
360           std::vector<float> compressed_data =
361               BuildSparsityParameterAttribute<float>(result.selected_block_size,
362                                                      dense_data.data(), inst,
363                                                      &builder, &s_param);
364           auto compressed_data_type = RankedTensorType::get(
365               {static_cast<int64_t>(compressed_data.size())},
366               builder.getF32Type());
367           auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
368                                                          compressed_data);
369           auto s_const = builder.create<SparseConstOp>(
370               op->getLoc(), cst.value(), s_param, new_value);
371           value.replaceAllUsesWith(s_const.getResult());
372           cst.erase();
373         } else if (type.getElementType().isF16()) {
374           std::vector<Eigen::half> dense_data;
375           dense_data.reserve(type.getNumElements());
376           for (const auto& val : attr.getValues<APFloat>())
377             dense_data.push_back(APFloatToEigenHalf(val));
378           std::vector<Eigen::half> compressed_data =
379               BuildSparsityParameterAttribute<Eigen::half>(
380                   result.selected_block_size, dense_data.data(), inst, &builder,
381                   &s_param);
382           std::vector<APFloat> apfloat_data;
383           apfloat_data.reserve(type.getNumElements());
384           for (const auto& val : compressed_data)
385             apfloat_data.push_back(EigenHalfToAPFloat(val));
386           auto compressed_data_type = RankedTensorType::get(
387               {static_cast<int64_t>(compressed_data.size())},
388               type.getElementType());
389           auto new_value =
390               DenseElementsAttr::get(compressed_data_type, apfloat_data);
391           auto s_const = builder.create<SparseConstOp>(
392               op->getLoc(), cst.value(), s_param, new_value);
393           value.replaceAllUsesWith(s_const.getResult());
394           cst.erase();
395         }
396       } else if (auto cst = dyn_cast<QConstOp>(inst)) {
397         auto attr = cst.value();
398         auto type = cst.getType().cast<ShapedType>();
399         std::vector<int8_t> dense_data;
400         dense_data.reserve(type.getNumElements());
401         for (const auto& val : attr.getValues<int8_t>())
402           dense_data.push_back(val);
403         std::vector<int8_t> compressed_data =
404             BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
405                                                     dense_data.data(), inst,
406                                                     &builder, &s_param);
407         auto compressed_data_type = RankedTensorType::get(
408             {static_cast<int64_t>(compressed_data.size())},
409             builder.getIntegerType(8, true));
410         auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
411                                                         compressed_data);
412         auto s_qconst = builder.create<SparseQConstOp>(
413             op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
414         value.replaceAllUsesWith(s_qconst.getResult());
415         cst.erase();
416       }
417 
418       if (result.needs_densify) {
419         const auto value = op->getOperand(operand);
420         auto densify =
421             builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
422         value.replaceAllUsesWith(densify);
423         densify.setOperand(value);
424       }
425     }
426   });
427 }
428 
429 }  // namespace
430 
431 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()432 std::unique_ptr<OperationPass<FuncOp>> CreateDenseToSparsePass() {
433   return absl::make_unique<DenseToSparse>();
434 }
435 
436 static PassRegistration<DenseToSparse> pass;
437 
438 }  // namespace TFL
439 }  // namespace mlir
440