1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 18 #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 19 20 #include <algorithm> 21 #include <sstream> 22 23 #include "base/arena_allocator.h" 24 #include "base/arena_bit_vector.h" 25 #include "base/arena_containers.h" 26 #include "base/array_ref.h" 27 #include "base/bit_vector-inl.h" 28 #include "base/globals.h" 29 #include "base/iteration_range.h" 30 #include "base/macros.h" 31 #include "base/mutex.h" 32 #include "base/scoped_arena_allocator.h" 33 #include "base/scoped_arena_containers.h" 34 #include "base/stl_util.h" 35 #include "base/transform_iterator.h" 36 #include "nodes.h" 37 38 namespace art HIDDEN { 39 40 // Helper for transforming blocks to block_ids. 41 class BlockToBlockIdTransformer { 42 public: 43 BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; 44 BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; BlockToBlockIdTransformer()45 BlockToBlockIdTransformer() {} 46 operator()47 inline uint32_t operator()(const HBasicBlock* b) const { 48 return b->GetBlockId(); 49 } 50 }; 51 52 // Helper for transforming block ids to blocks. 53 class BlockIdToBlockTransformer { 54 public: 55 BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; 56 BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; BlockIdToBlockTransformer(const HGraph * graph)57 explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} 58 GetGraph()59 inline const HGraph* GetGraph() const { 60 return graph_; 61 } 62 GetBlock(uint32_t id)63 inline HBasicBlock* GetBlock(uint32_t id) const { 64 DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); 65 HBasicBlock* blk = graph_->GetBlocks()[id]; 66 DCHECK(blk != nullptr); 67 return blk; 68 } 69 operator()70 inline HBasicBlock* operator()(uint32_t id) const { 71 return GetBlock(id); 72 } 73 74 private: 75 const HGraph* const graph_; 76 }; 77 78 class BlockIdFilterThunk { 79 public: BlockIdFilterThunk(const BitVector & i)80 explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} 81 BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; 82 BlockIdFilterThunk(const BlockIdFilterThunk&) = default; 83 operator()84 bool operator()(const HBasicBlock* b) const { 85 return inner_.IsBitSet(b->GetBlockId()); 86 } 87 88 private: 89 const BitVector& inner_; 90 }; 91 92 // A representation of a particular section of the graph. The graph is split 93 // into an excluded and included area and is used to track escapes. 94 // 95 // This object is a view of the graph and is not updated as the graph is 96 // changed. 97 // 98 // This is implemented by removing various escape points from the subgraph using 99 // the 'RemoveBlock' function. Once all required blocks are removed one will 100 // 'Finalize' the subgraph. This will extend the removed area to include: 101 // (1) Any block which inevitably leads to (post-dominates) a removed block 102 // (2) any block which is between 2 removed blocks 103 // 104 // This allows us to create a set of 'ExcludedCohorts' which are the 105 // well-connected subsets of the graph made up of removed blocks. These cohorts 106 // have a set of entry and exit blocks which act as the boundary of the cohort. 107 // Since we removed blocks between 2 excluded blocks it is impossible for any 108 // cohort-exit block to reach any cohort-entry block. This means we can use the 109 // boundary between the cohort and the rest of the graph to insert 110 // materialization blocks for partial LSE. 111 // 112 // TODO We really should expand this to take into account where the object 113 // allocation takes place directly. Currently we always act as though it were 114 // allocated in the entry block. This is a massively simplifying assumption but 115 // means we can't partially remove objects that are repeatedly allocated in a 116 // loop. 117 class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { 118 public: 119 using BitVecBlockRange = 120 IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; 121 using FilteredBitVecBlockRange = IterationRange< 122 FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>; 123 124 // A set of connected blocks which are connected and removed from the 125 // ExecutionSubgraph. See above comment for explanation. 126 class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { 127 public: 128 ExcludedCohort(ExcludedCohort&&) = default; 129 ExcludedCohort(const ExcludedCohort&) = delete; ExcludedCohort(ScopedArenaAllocator * allocator,HGraph * graph)130 explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) 131 : graph_(graph), 132 entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), 133 exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), 134 blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} 135 136 ~ExcludedCohort() = default; 137 138 // All blocks in the cohort. Blocks()139 BitVecBlockRange Blocks() const { 140 return BlockIterRange(blocks_); 141 } 142 143 // Blocks that have predecessors outside of the cohort. These blocks will 144 // need to have PHIs/control-flow added to create the escaping value. EntryBlocks()145 BitVecBlockRange EntryBlocks() const { 146 return BlockIterRange(entry_blocks_); 147 } 148 EntryBlocksReversePostOrder()149 FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { 150 return Filter(MakeIterationRange(graph_->GetReversePostOrder()), 151 BlockIdFilterThunk(entry_blocks_)); 152 } 153 IsEntryBlock(const HBasicBlock * blk)154 bool IsEntryBlock(const HBasicBlock* blk) const { 155 return entry_blocks_.IsBitSet(blk->GetBlockId()); 156 } 157 158 // Blocks that have successors outside of the cohort. The successors of 159 // these blocks will need to have PHI's to restore state. ExitBlocks()160 BitVecBlockRange ExitBlocks() const { 161 return BlockIterRange(exit_blocks_); 162 } 163 164 bool operator==(const ExcludedCohort& other) const { 165 return blocks_.Equal(&other.blocks_); 166 } 167 ContainsBlock(const HBasicBlock * blk)168 bool ContainsBlock(const HBasicBlock* blk) const { 169 return blocks_.IsBitSet(blk->GetBlockId()); 170 } 171 172 // Returns true if there is a path from 'blk' to any block in this cohort. 173 // NB blocks contained within the cohort are not considered to be succeeded 174 // by the cohort (i.e. this function will return false). SucceedsBlock(const HBasicBlock * blk)175 bool SucceedsBlock(const HBasicBlock* blk) const { 176 if (ContainsBlock(blk)) { 177 return false; 178 } 179 auto idxs = entry_blocks_.Indexes(); 180 return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { 181 return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); 182 }); 183 } 184 185 // Returns true if there is a path from any block in this cohort to 'blk'. 186 // NB blocks contained within the cohort are not considered to be preceded 187 // by the cohort (i.e. this function will return false). PrecedesBlock(const HBasicBlock * blk)188 bool PrecedesBlock(const HBasicBlock* blk) const { 189 if (ContainsBlock(blk)) { 190 return false; 191 } 192 auto idxs = exit_blocks_.Indexes(); 193 return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { 194 return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); 195 }); 196 } 197 198 void Dump(std::ostream& os) const; 199 200 private: BlockIterRange(const ArenaBitVector & bv)201 BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { 202 auto indexes = bv.Indexes(); 203 BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); 204 return res; 205 } 206 207 ExcludedCohort() = delete; 208 209 HGraph* graph_; 210 ArenaBitVector entry_blocks_; 211 ArenaBitVector exit_blocks_; 212 ArenaBitVector blocks_; 213 214 friend class ExecutionSubgraph; 215 friend class LoadStoreAnalysisTest; 216 }; 217 218 // The number of successors we can track on a single block. Graphs which 219 // contain a block with a branching factor greater than this will not be 220 // analysed. This is used to both limit the memory usage of analysis to 221 // reasonable levels and ensure that the analysis will complete in a 222 // reasonable amount of time. It also simplifies the implementation somewhat 223 // to have a constant branching factor. 224 static constexpr uint32_t kMaxFilterableSuccessors = 8; 225 226 // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape 227 // analysis is desired (eg not when being used for instruction scheduling) and 228 // when the branching factor in the graph is not too high. These conditions 229 // are determined once and passed down for performance reasons. 230 ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); 231 Invalidate()232 void Invalidate() { 233 valid_ = false; 234 } 235 236 // A block is contained by the ExecutionSubgraph if it is reachable. This 237 // means it has not been removed explicitly or via pruning/concavity removal. 238 // Finalization is needed to call this function. 239 // See RemoveConcavity and Prune for more information. ContainsBlock(const HBasicBlock * blk)240 bool ContainsBlock(const HBasicBlock* blk) const { 241 DCHECK_IMPLIES(finalized_, !needs_prune_); 242 if (!valid_) { 243 return false; 244 } 245 return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); 246 } 247 248 // Mark the block as removed from the subgraph. 249 void RemoveBlock(const HBasicBlock* to_remove); 250 251 // Called when no more updates will be done to the subgraph. Calculate the 252 // final subgraph Finalize()253 void Finalize() { 254 Prune(); 255 RemoveConcavity(); 256 finalized_ = true; 257 } 258 UnreachableBlocks()259 BitVecBlockRange UnreachableBlocks() const { 260 auto idxs = unreachable_blocks_.Indexes(); 261 return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); 262 } 263 264 // Returns true if all allowed execution paths from start eventually reach the 265 // graph's exit block (or diverge). IsValid()266 bool IsValid() const { 267 return valid_; 268 } 269 GetExcludedCohorts()270 ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { 271 DCHECK_IMPLIES(valid_, !needs_prune_); 272 if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { 273 return ArrayRef<const ExcludedCohort>(); 274 } else { 275 return ArrayRef<const ExcludedCohort>(*excluded_list_); 276 } 277 } 278 279 // Helper class to create reachable blocks iterator. 280 class ContainsFunctor { 281 public: operator()282 bool operator()(HBasicBlock* blk) const { 283 return subgraph_->ContainsBlock(blk); 284 } 285 286 private: ContainsFunctor(const ExecutionSubgraph * subgraph)287 explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} 288 const ExecutionSubgraph* const subgraph_; 289 friend class ExecutionSubgraph; 290 }; 291 // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. 292 IterationRange< 293 FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> ReachableBlocks()294 ReachableBlocks() const { 295 return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); 296 } 297 CanAnalyse(HGraph * graph)298 static bool CanAnalyse(HGraph* graph) { 299 // If there are any blocks with more than kMaxFilterableSuccessors we can't 300 // analyse the graph. We avoid this case to prevent excessive memory and 301 // time usage while allowing a simpler algorithm with a fixed-width 302 // branching factor. 303 return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { 304 return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; 305 }); 306 } 307 308 private: GetAllowedSuccessors(const HBasicBlock * blk)309 std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { 310 DCHECK(valid_); 311 return allowed_successors_[blk->GetBlockId()]; 312 } 313 LimitBlockSuccessors(const HBasicBlock * block,std::bitset<kMaxFilterableSuccessors> allowed)314 void LimitBlockSuccessors(const HBasicBlock* block, 315 std::bitset<kMaxFilterableSuccessors> allowed) { 316 needs_prune_ = true; 317 allowed_successors_[block->GetBlockId()] &= allowed; 318 } 319 320 // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal 321 // with only conditionally materializing objects depending on if we already materialized them 322 // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && 323 // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures 324 // that no execution can leave and then re-enter any exclusion. 325 void RemoveConcavity(); 326 327 // Removes sink nodes. Sink nodes are nodes where there is no execution which 328 // avoids all removed nodes. 329 void Prune(); 330 331 void RecalculateExcludedCohort(); 332 333 HGraph* graph_; 334 ScopedArenaAllocator* allocator_; 335 // The map from block_id -> allowed-successors. 336 // This is the canonical representation of this subgraph. If a bit in the 337 // bitset is not set then the corresponding outgoing edge of that block is not 338 // considered traversable. 339 ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; 340 // Helper that holds which blocks we are able to reach. Only valid if 341 // 'needs_prune_ == false'. 342 ArenaBitVector unreachable_blocks_; 343 // A list of the excluded-cohorts of this subgraph. This is only valid when 344 // 'needs_prune_ == false' 345 std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; 346 // Bool to hold if there is at least one known path from the start block to 347 // the end in this graph. Used to short-circuit computation. 348 bool valid_; 349 // True if the subgraph is consistent and can be queried. Modifying the 350 // subgraph clears this and requires a prune to restore. 351 bool needs_prune_; 352 // True if no more modification of the subgraph is permitted. 353 bool finalized_; 354 355 friend class ExecutionSubgraphTest; 356 friend class LoadStoreAnalysisTest; 357 358 DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); 359 }; 360 361 std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); 362 363 } // namespace art 364 365 #endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 366