1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 18 #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 19 20 #include <algorithm> 21 #include <sstream> 22 23 #include "base/arena_allocator.h" 24 #include "base/arena_bit_vector.h" 25 #include "base/arena_containers.h" 26 #include "base/array_ref.h" 27 #include "base/bit_vector-inl.h" 28 #include "base/globals.h" 29 #include "base/iteration_range.h" 30 #include "base/mutex.h" 31 #include "base/scoped_arena_allocator.h" 32 #include "base/scoped_arena_containers.h" 33 #include "base/stl_util.h" 34 #include "base/transform_iterator.h" 35 #include "nodes.h" 36 37 namespace art { 38 39 // Helper for transforming blocks to block_ids. 40 class BlockToBlockIdTransformer { 41 public: 42 BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; 43 BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; BlockToBlockIdTransformer()44 BlockToBlockIdTransformer() {} 45 operator()46 inline uint32_t operator()(const HBasicBlock* b) const { 47 return b->GetBlockId(); 48 } 49 }; 50 51 // Helper for transforming block ids to blocks. 52 class BlockIdToBlockTransformer { 53 public: 54 BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; 55 BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; BlockIdToBlockTransformer(const HGraph * graph)56 explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} 57 GetGraph()58 inline const HGraph* GetGraph() const { 59 return graph_; 60 } 61 GetBlock(uint32_t id)62 inline HBasicBlock* GetBlock(uint32_t id) const { 63 DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); 64 HBasicBlock* blk = graph_->GetBlocks()[id]; 65 DCHECK(blk != nullptr); 66 return blk; 67 } 68 operator()69 inline HBasicBlock* operator()(uint32_t id) const { 70 return GetBlock(id); 71 } 72 73 private: 74 const HGraph* const graph_; 75 }; 76 77 class BlockIdFilterThunk { 78 public: BlockIdFilterThunk(const BitVector & i)79 explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} 80 BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; 81 BlockIdFilterThunk(const BlockIdFilterThunk&) = default; 82 operator()83 bool operator()(const HBasicBlock* b) const { 84 return inner_.IsBitSet(b->GetBlockId()); 85 } 86 87 private: 88 const BitVector& inner_; 89 }; 90 91 // A representation of a particular section of the graph. The graph is split 92 // into an excluded and included area and is used to track escapes. 93 // 94 // This object is a view of the graph and is not updated as the graph is 95 // changed. 96 // 97 // This is implemented by removing various escape points from the subgraph using 98 // the 'RemoveBlock' function. Once all required blocks are removed one will 99 // 'Finalize' the subgraph. This will extend the removed area to include: 100 // (1) Any block which inevitably leads to (post-dominates) a removed block 101 // (2) any block which is between 2 removed blocks 102 // 103 // This allows us to create a set of 'ExcludedCohorts' which are the 104 // well-connected subsets of the graph made up of removed blocks. These cohorts 105 // have a set of entry and exit blocks which act as the boundary of the cohort. 106 // Since we removed blocks between 2 excluded blocks it is impossible for any 107 // cohort-exit block to reach any cohort-entry block. This means we can use the 108 // boundary between the cohort and the rest of the graph to insert 109 // materialization blocks for partial LSE. 110 // 111 // TODO We really should expand this to take into account where the object 112 // allocation takes place directly. Currently we always act as though it were 113 // allocated in the entry block. This is a massively simplifying assumption but 114 // means we can't partially remove objects that are repeatedly allocated in a 115 // loop. 116 class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { 117 public: 118 using BitVecBlockRange = 119 IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; 120 using FilteredBitVecBlockRange = IterationRange< 121 FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>; 122 123 // A set of connected blocks which are connected and removed from the 124 // ExecutionSubgraph. See above comment for explanation. 125 class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { 126 public: 127 ExcludedCohort(ExcludedCohort&&) = default; 128 ExcludedCohort(const ExcludedCohort&) = delete; ExcludedCohort(ScopedArenaAllocator * allocator,HGraph * graph)129 explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) 130 : graph_(graph), 131 entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), 132 exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), 133 blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} 134 135 ~ExcludedCohort() = default; 136 137 // All blocks in the cohort. Blocks()138 BitVecBlockRange Blocks() const { 139 return BlockIterRange(blocks_); 140 } 141 142 // Blocks that have predecessors outside of the cohort. These blocks will 143 // need to have PHIs/control-flow added to create the escaping value. EntryBlocks()144 BitVecBlockRange EntryBlocks() const { 145 return BlockIterRange(entry_blocks_); 146 } 147 EntryBlocksReversePostOrder()148 FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { 149 return Filter(MakeIterationRange(graph_->GetReversePostOrder()), 150 BlockIdFilterThunk(entry_blocks_)); 151 } 152 IsEntryBlock(const HBasicBlock * blk)153 bool IsEntryBlock(const HBasicBlock* blk) const { 154 return entry_blocks_.IsBitSet(blk->GetBlockId()); 155 } 156 157 // Blocks that have successors outside of the cohort. The successors of 158 // these blocks will need to have PHI's to restore state. ExitBlocks()159 BitVecBlockRange ExitBlocks() const { 160 return BlockIterRange(exit_blocks_); 161 } 162 163 bool operator==(const ExcludedCohort& other) const { 164 return blocks_.Equal(&other.blocks_); 165 } 166 ContainsBlock(const HBasicBlock * blk)167 bool ContainsBlock(const HBasicBlock* blk) const { 168 return blocks_.IsBitSet(blk->GetBlockId()); 169 } 170 171 // Returns true if there is a path from 'blk' to any block in this cohort. 172 // NB blocks contained within the cohort are not considered to be succeeded 173 // by the cohort (i.e. this function will return false). SucceedsBlock(const HBasicBlock * blk)174 bool SucceedsBlock(const HBasicBlock* blk) const { 175 if (ContainsBlock(blk)) { 176 return false; 177 } 178 auto idxs = entry_blocks_.Indexes(); 179 return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { 180 return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); 181 }); 182 } 183 184 // Returns true if there is a path from any block in this cohort to 'blk'. 185 // NB blocks contained within the cohort are not considered to be preceded 186 // by the cohort (i.e. this function will return false). PrecedesBlock(const HBasicBlock * blk)187 bool PrecedesBlock(const HBasicBlock* blk) const { 188 if (ContainsBlock(blk)) { 189 return false; 190 } 191 auto idxs = exit_blocks_.Indexes(); 192 return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { 193 return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); 194 }); 195 } 196 197 void Dump(std::ostream& os) const; 198 199 private: BlockIterRange(const ArenaBitVector & bv)200 BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { 201 auto indexes = bv.Indexes(); 202 BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); 203 return res; 204 } 205 206 ExcludedCohort() = delete; 207 208 HGraph* graph_; 209 ArenaBitVector entry_blocks_; 210 ArenaBitVector exit_blocks_; 211 ArenaBitVector blocks_; 212 213 friend class ExecutionSubgraph; 214 friend class LoadStoreAnalysisTest; 215 }; 216 217 // The number of successors we can track on a single block. Graphs which 218 // contain a block with a branching factor greater than this will not be 219 // analysed. This is used to both limit the memory usage of analysis to 220 // reasonable levels and ensure that the analysis will complete in a 221 // reasonable amount of time. It also simplifies the implementation somewhat 222 // to have a constant branching factor. 223 static constexpr uint32_t kMaxFilterableSuccessors = 8; 224 225 // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape 226 // analysis is desired (eg not when being used for instruction scheduling) and 227 // when the branching factor in the graph is not too high. These conditions 228 // are determined once and passed down for performance reasons. 229 ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); 230 Invalidate()231 void Invalidate() { 232 valid_ = false; 233 } 234 235 // A block is contained by the ExecutionSubgraph if it is reachable. This 236 // means it has not been removed explicitly or via pruning/concavity removal. 237 // Finalization is needed to call this function. 238 // See RemoveConcavity and Prune for more information. ContainsBlock(const HBasicBlock * blk)239 bool ContainsBlock(const HBasicBlock* blk) const { 240 DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_; 241 if (!valid_) { 242 return false; 243 } 244 return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); 245 } 246 247 // Mark the block as removed from the subgraph. 248 void RemoveBlock(const HBasicBlock* to_remove); 249 250 // Called when no more updates will be done to the subgraph. Calculate the 251 // final subgraph Finalize()252 void Finalize() { 253 Prune(); 254 RemoveConcavity(); 255 finalized_ = true; 256 } 257 UnreachableBlocks()258 BitVecBlockRange UnreachableBlocks() const { 259 auto idxs = unreachable_blocks_.Indexes(); 260 return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); 261 } 262 263 // Returns true if all allowed execution paths from start eventually reach the 264 // graph's exit block (or diverge). IsValid()265 bool IsValid() const { 266 return valid_; 267 } 268 GetExcludedCohorts()269 ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { 270 DCHECK(!valid_ || !needs_prune_); 271 if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { 272 return ArrayRef<const ExcludedCohort>(); 273 } else { 274 return ArrayRef<const ExcludedCohort>(*excluded_list_); 275 } 276 } 277 278 // Helper class to create reachable blocks iterator. 279 class ContainsFunctor { 280 public: operator()281 bool operator()(HBasicBlock* blk) const { 282 return subgraph_->ContainsBlock(blk); 283 } 284 285 private: ContainsFunctor(const ExecutionSubgraph * subgraph)286 explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} 287 const ExecutionSubgraph* const subgraph_; 288 friend class ExecutionSubgraph; 289 }; 290 // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. 291 IterationRange< 292 FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> ReachableBlocks()293 ReachableBlocks() const { 294 return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); 295 } 296 CanAnalyse(HGraph * graph)297 static bool CanAnalyse(HGraph* graph) { 298 // If there are any blocks with more than kMaxFilterableSuccessors we can't 299 // analyse the graph. We avoid this case to prevent excessive memory and 300 // time usage while allowing a simpler algorithm with a fixed-width 301 // branching factor. 302 return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { 303 return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; 304 }); 305 } 306 307 private: GetAllowedSuccessors(const HBasicBlock * blk)308 std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { 309 DCHECK(valid_); 310 return allowed_successors_[blk->GetBlockId()]; 311 } 312 LimitBlockSuccessors(const HBasicBlock * block,std::bitset<kMaxFilterableSuccessors> allowed)313 void LimitBlockSuccessors(const HBasicBlock* block, 314 std::bitset<kMaxFilterableSuccessors> allowed) { 315 needs_prune_ = true; 316 allowed_successors_[block->GetBlockId()] &= allowed; 317 } 318 319 // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal 320 // with only conditionally materializing objects depending on if we already materialized them 321 // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && 322 // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures 323 // that no execution can leave and then re-enter any exclusion. 324 void RemoveConcavity(); 325 326 // Removes sink nodes. Sink nodes are nodes where there is no execution which 327 // avoids all removed nodes. 328 void Prune(); 329 330 void RecalculateExcludedCohort(); 331 332 HGraph* graph_; 333 ScopedArenaAllocator* allocator_; 334 // The map from block_id -> allowed-successors. 335 // This is the canonical representation of this subgraph. If a bit in the 336 // bitset is not set then the corresponding outgoing edge of that block is not 337 // considered traversable. 338 ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; 339 // Helper that holds which blocks we are able to reach. Only valid if 340 // 'needs_prune_ == false'. 341 ArenaBitVector unreachable_blocks_; 342 // A list of the excluded-cohorts of this subgraph. This is only valid when 343 // 'needs_prune_ == false' 344 std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; 345 // Bool to hold if there is at least one known path from the start block to 346 // the end in this graph. Used to short-circuit computation. 347 bool valid_; 348 // True if the subgraph is consistent and can be queried. Modifying the 349 // subgraph clears this and requires a prune to restore. 350 bool needs_prune_; 351 // True if no more modification of the subgraph is permitted. 352 bool finalized_; 353 354 friend class ExecutionSubgraphTest; 355 friend class LoadStoreAnalysisTest; 356 357 DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); 358 }; 359 360 std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); 361 362 } // namespace art 363 364 #endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ 365