1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes for various loop fusion utility 10 // methods: these are not passes by themselves but are used either by passes, 11 // optimization sequences, or in turn by other transformation utilities. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 16 #define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 17 18 #include "mlir/IR/Value.h" 19 #include "mlir/Support/LLVM.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallVector.h" 22 23 namespace mlir { 24 class AffineForOp; 25 struct ComputationSliceState; 26 class Operation; 27 28 // TODO: Extend this module to include utility functions for querying fusion 29 // cost/storage reduction, and for performing the loop fusion transformation. 30 31 struct FusionResult { 32 enum ResultEnum { 33 Success, 34 FailPrecondition, // Failed precondition for fusion. (e.g. same block). 35 FailBlockDependence, // Fusion would violate another dependence in block. 36 FailFusionDependence, // Fusion would reverse dependences between loops. 37 FailComputationSlice, // Unable to compute src loop computation slice. 38 } value; FusionResultFusionResult39 FusionResult(ResultEnum v) : value(v) {} 40 }; 41 42 /// Describes the fusion strategy to be used in the Affine loop fusion 43 /// utilities. Currently, it is used to specialized the loop fusion utilities 44 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer 45 /// and sibling fusion, while sharing a single implementation. The latter 46 /// strategies are also limited to scenarios where a single memref is involved 47 /// in the producer-consume or sibling relationship between the candidate 48 /// loops. We use 'memref' to keep track of such a memref. 49 // TODO: Remove 'memref' when we support more generic scenarios. 50 // TODO: Generalize utilities so that producer-consumer and sibling fusion 51 // strategies can be used without the assumptions made in the AffineLoopFusion 52 // pass. 53 struct FusionStrategy { 54 enum StrategyEnum { 55 // Generic loop fusion: Arbitrary loops are considered for fusion. No 56 // assumptions about a specific fusion strategy from AffineLoopFusion pass 57 // are made. 58 // TODO: Generic fusion is not fully implemented by fusion utilities yet. 59 // It should only be used for testing. 60 Generic, 61 // Producer-consumer fusion: Only loops with a producer-consumer 62 // memref dependence are considered for fusion. Currently, assumptions from 63 // the producer-consumer fusion implementation in AffineLoopFusion pass are 64 // made. See pass for specific details. 65 ProducerConsumer, 66 // Sibling fusion: Only sibling loops with no producer-consumer memref 67 // dependences are considered for fusion. Memref reuse is taken into account 68 // for profitability. Currently, assumptions from the sibling fusion 69 // implementation in AffineLoopFusion pass are made. See pass for specific 70 // details. 71 Sibling 72 } strategy; 73 74 // Target memref for this fusion transformation. 75 Value memref; 76 FusionStrategyFusionStrategy77 FusionStrategy(StrategyEnum strategy, Value memref) 78 : strategy(strategy), memref(memref) {} 79 }; 80 81 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the 82 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult 83 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are 84 /// in the same block and dependences would not be violated). Otherwise 85 /// returns a FusionResult explaining why fusion is not feasible. 86 /// NOTE: This function is not feature complete and should only be used in 87 /// testing. 88 /// TODO: Update comments when this function is fully implemented. 89 FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, 90 unsigned dstLoopDepth, 91 ComputationSliceState *srcSlice, 92 FusionStrategy fusionStrategy = { 93 FusionStrategy::Generic, Value()}); 94 95 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point 96 /// and source slice loop bounds specified in 'srcSlice'. 97 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, 98 const ComputationSliceState &srcSlice); 99 100 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count 101 /// and operation count) for a loop nest up until (and including) the innermost 102 /// loop body. 103 struct LoopNestStats { 104 /// Map from AffineForOp to immediate child AffineForOps in its loop body. 105 DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap; 106 /// Map from AffineForOp to count of operations in its loop body. 107 DenseMap<Operation *, uint64_t> opCountMap; 108 /// Map from AffineForOp to its constant trip count. 109 DenseMap<Operation *, uint64_t> tripCountMap; 110 }; 111 112 /// Collect loop nest statistics (eg. loop trip count and operation count) 113 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success, 114 /// returns false otherwise. 115 // TODO: Consider moving this to LoopUtils. 116 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats); 117 118 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'. 119 /// Currently, the total cost is computed by counting the total operation 120 /// instance count (i.e. total number of operations in the loop body * loop 121 /// trip count) for the entire loop nest. 122 // TODO: Improve this cost model. 123 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats); 124 125 /// Computes and returns in 'computeCost', the total compute cost of fusing the 126 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently, 127 /// the total cost is computed by counting the total operation instance count 128 /// (i.e. total number of operations in the loop body * loop trip count) for 129 /// the entire loop nest. 130 /// Returns true on success, failure otherwise (e.g. non-constant trip counts). 131 // TODO: Improve this cost model. 132 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, 133 AffineForOp dstForOp, LoopNestStats &dstStats, 134 const ComputationSliceState &slice, 135 int64_t *computeCost); 136 137 } // end namespace mlir 138 139 #endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H 140