1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Undo the effect of Loop Invariant Code Motion (LICM) and
10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level.
11 //
12 // Namely, remove register/scalar dependencies by mapping them back to array
13 // elements.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "polly/DeLICM.h"
18 #include "polly/LinkAllPasses.h"
19 #include "polly/Options.h"
20 #include "polly/ScopInfo.h"
21 #include "polly/ScopPass.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/ISLOStream.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/ZoneAlgo.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/InitializePasses.h"
28
29 #define DEBUG_TYPE "polly-delicm"
30
31 using namespace polly;
32 using namespace llvm;
33
34 namespace {
35
36 cl::opt<int>
37 DelicmMaxOps("polly-delicm-max-ops",
38 cl::desc("Maximum number of isl operations to invest for "
39 "lifetime analysis; 0=no limit"),
40 cl::init(1000000), cl::cat(PollyCategory));
41
42 cl::opt<bool> DelicmOverapproximateWrites(
43 "polly-delicm-overapproximate-writes",
44 cl::desc(
45 "Do more PHI writes than necessary in order to avoid partial accesses"),
46 cl::init(false), cl::Hidden, cl::cat(PollyCategory));
47
48 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes",
49 cl::desc("Allow partial writes"),
50 cl::init(true), cl::Hidden,
51 cl::cat(PollyCategory));
52
53 cl::opt<bool>
54 DelicmComputeKnown("polly-delicm-compute-known",
55 cl::desc("Compute known content of array elements"),
56 cl::init(true), cl::Hidden, cl::cat(PollyCategory));
57
58 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs");
59 STATISTIC(DeLICMOutOfQuota,
60 "Analyses aborted because max_operations was reached");
61 STATISTIC(MappedValueScalars, "Number of mapped Value scalars");
62 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars");
63 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
64 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
65
66 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
67 STATISTIC(NumValueWritesInLoops,
68 "Number of scalar value writes nested in affine loops after DeLICM");
69 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
70 STATISTIC(NumPHIWritesInLoops,
71 "Number of scalar phi writes nested in affine loops after DeLICM");
72 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
73 STATISTIC(NumSingletonWritesInLoops,
74 "Number of singleton writes nested in affine loops after DeLICM");
75
computeReachingOverwrite(isl::union_map Schedule,isl::union_map Writes,bool InclPrevWrite,bool InclOverwrite)76 isl::union_map computeReachingOverwrite(isl::union_map Schedule,
77 isl::union_map Writes,
78 bool InclPrevWrite,
79 bool InclOverwrite) {
80 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite,
81 InclOverwrite);
82 }
83
84 /// Compute the next overwrite for a scalar.
85 ///
86 /// @param Schedule { DomainWrite[] -> Scatter[] }
87 /// Schedule of (at least) all writes. Instances not in @p
88 /// Writes are ignored.
89 /// @param Writes { DomainWrite[] }
90 /// The element instances that write to the scalar.
91 /// @param InclPrevWrite Whether to extend the timepoints to include
92 /// the timepoint where the previous write happens.
93 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint
94 /// of the overwrite itself.
95 ///
96 /// @return { Scatter[] -> DomainDef[] }
computeScalarReachingOverwrite(isl::union_map Schedule,isl::union_set Writes,bool InclPrevWrite,bool InclOverwrite)97 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule,
98 isl::union_set Writes,
99 bool InclPrevWrite,
100 bool InclOverwrite) {
101
102 // { DomainWrite[] }
103 auto WritesMap = isl::union_map::from_domain(Writes);
104
105 // { [Element[] -> Scatter[]] -> DomainWrite[] }
106 auto Result = computeReachingOverwrite(
107 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite);
108
109 return Result.domain_factor_range();
110 }
111
112 /// Overload of computeScalarReachingOverwrite, with only one writing statement.
113 /// Consequently, the result consists of only one map space.
114 ///
115 /// @param Schedule { DomainWrite[] -> Scatter[] }
116 /// @param Writes { DomainWrite[] }
117 /// @param InclPrevWrite Include the previous write to result.
118 /// @param InclOverwrite Include the overwrite to the result.
119 ///
120 /// @return { Scatter[] -> DomainWrite[] }
computeScalarReachingOverwrite(isl::union_map Schedule,isl::set Writes,bool InclPrevWrite,bool InclOverwrite)121 isl::map computeScalarReachingOverwrite(isl::union_map Schedule,
122 isl::set Writes, bool InclPrevWrite,
123 bool InclOverwrite) {
124 isl::space ScatterSpace = getScatterSpace(Schedule);
125 isl::space DomSpace = Writes.get_space();
126
127 isl::union_map ReachOverwrite = computeScalarReachingOverwrite(
128 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite);
129
130 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace);
131 return singleton(std::move(ReachOverwrite), ResultSpace);
132 }
133
134 /// Try to find a 'natural' extension of a mapped to elements outside its
135 /// domain.
136 ///
137 /// @param Relevant The map with mapping that may not be modified.
138 /// @param Universe The domain to which @p Relevant needs to be extended.
139 ///
140 /// @return A map with that associates the domain elements of @p Relevant to the
141 /// same elements and in addition the elements of @p Universe to some
142 /// undefined elements. The function prefers to return simple maps.
expandMapping(isl::union_map Relevant,isl::union_set Universe)143 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) {
144 Relevant = Relevant.coalesce();
145 isl::union_set RelevantDomain = Relevant.domain();
146 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain);
147 Simplified = Simplified.coalesce();
148 return Simplified.intersect_domain(Universe);
149 }
150
151 /// Represent the knowledge of the contents of any array elements in any zone or
152 /// the knowledge we would add when mapping a scalar to an array element.
153 ///
154 /// Every array element at every zone unit has one of two states:
155 ///
156 /// - Unused: Not occupied by any value so a transformation can change it to
157 /// other values.
158 ///
159 /// - Occupied: The element contains a value that is still needed.
160 ///
161 /// The union of Unused and Unknown zones forms the universe, the set of all
162 /// elements at every timepoint. The universe can easily be derived from the
163 /// array elements that are accessed someway. Arrays that are never accessed
164 /// also never play a role in any computation and can hence be ignored. With a
165 /// given universe, only one of the sets needs to stored implicitly. Computing
166 /// the complement is also an expensive operation, hence this class has been
167 /// designed that only one of sets is needed while the other is assumed to be
168 /// implicit. It can still be given, but is mostly ignored.
169 ///
170 /// There are two use cases for the Knowledge class:
171 ///
172 /// 1) To represent the knowledge of the current state of ScopInfo. The unused
173 /// state means that an element is currently unused: there is no read of it
174 /// before the next overwrite. Also called 'Existing'.
175 ///
176 /// 2) To represent the requirements for mapping a scalar to array elements. The
177 /// unused state means that there is no change/requirement. Also called
178 /// 'Proposed'.
179 ///
180 /// In addition to these states at unit zones, Knowledge needs to know when
181 /// values are written. This is because written values may have no lifetime (one
182 /// reason is that the value is never read). Such writes would therefore never
183 /// conflict, but overwrite values that might still be required. Another source
184 /// of problems are multiple writes to the same element at the same timepoint,
185 /// because their order is undefined.
186 class Knowledge {
187 private:
188 /// { [Element[] -> Zone[]] }
189 /// Set of array elements and when they are alive.
190 /// Can contain a nullptr; in this case the set is implicitly defined as the
191 /// complement of #Unused.
192 ///
193 /// The set of alive array elements is represented as zone, as the set of live
194 /// values can differ depending on how the elements are interpreted.
195 /// Assuming a value X is written at timestep [0] and read at timestep [1]
196 /// without being used at any later point, then the value is alive in the
197 /// interval ]0,1[. This interval cannot be represented by an integer set, as
198 /// it does not contain any integer point. Zones allow us to represent this
199 /// interval and can be converted to sets of timepoints when needed (e.g., in
200 /// isConflicting when comparing to the write sets).
201 /// @see convertZoneToTimepoints and this file's comment for more details.
202 isl::union_set Occupied;
203
204 /// { [Element[] -> Zone[]] }
205 /// Set of array elements when they are not alive, i.e. their memory can be
206 /// used for other purposed. Can contain a nullptr; in this case the set is
207 /// implicitly defined as the complement of #Occupied.
208 isl::union_set Unused;
209
210 /// { [Element[] -> Zone[]] -> ValInst[] }
211 /// Maps to the known content for each array element at any interval.
212 ///
213 /// Any element/interval can map to multiple known elements. This is due to
214 /// multiple llvm::Value referring to the same content. Examples are
215 ///
216 /// - A value stored and loaded again. The LoadInst represents the same value
217 /// as the StoreInst's value operand.
218 ///
219 /// - A PHINode is equal to any one of the incoming values. In case of
220 /// LCSSA-form, it is always equal to its single incoming value.
221 ///
222 /// Two Knowledges are considered not conflicting if at least one of the known
223 /// values match. Not known values are not stored as an unnamed tuple (as
224 /// #Written does), but maps to nothing.
225 ///
226 /// Known values are usually just defined for #Occupied elements. Knowing
227 /// #Unused contents has no advantage as it can be overwritten.
228 isl::union_map Known;
229
230 /// { [Element[] -> Scatter[]] -> ValInst[] }
231 /// The write actions currently in the scop or that would be added when
232 /// mapping a scalar. Maps to the value that is written.
233 ///
234 /// Written values that cannot be identified are represented by an unknown
235 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself.
236 isl::union_map Written;
237
238 /// Check whether this Knowledge object is well-formed.
checkConsistency() const239 void checkConsistency() const {
240 #ifndef NDEBUG
241 // Default-initialized object
242 if (!Occupied && !Unused && !Known && !Written)
243 return;
244
245 assert(Occupied || Unused);
246 assert(Known);
247 assert(Written);
248
249 // If not all fields are defined, we cannot derived the universe.
250 if (!Occupied || !Unused)
251 return;
252
253 assert(Occupied.is_disjoint(Unused));
254 auto Universe = Occupied.unite(Unused);
255
256 assert(!Known.domain().is_subset(Universe).is_false());
257 assert(!Written.domain().is_subset(Universe).is_false());
258 #endif
259 }
260
261 public:
262 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do
263 /// not use such an object.
Knowledge()264 Knowledge() {}
265
266 /// Create a new object with the given members.
Knowledge(isl::union_set Occupied,isl::union_set Unused,isl::union_map Known,isl::union_map Written)267 Knowledge(isl::union_set Occupied, isl::union_set Unused,
268 isl::union_map Known, isl::union_map Written)
269 : Occupied(std::move(Occupied)), Unused(std::move(Unused)),
270 Known(std::move(Known)), Written(std::move(Written)) {
271 checkConsistency();
272 }
273
274 /// Return whether this object was not default-constructed.
isUsable() const275 bool isUsable() const { return (Occupied || Unused) && Known && Written; }
276
277 /// Print the content of this object to @p OS.
print(llvm::raw_ostream & OS,unsigned Indent=0) const278 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const {
279 if (isUsable()) {
280 if (Occupied)
281 OS.indent(Indent) << "Occupied: " << Occupied << "\n";
282 else
283 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n";
284 if (Unused)
285 OS.indent(Indent) << "Unused: " << Unused << "\n";
286 else
287 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n";
288 OS.indent(Indent) << "Known: " << Known << "\n";
289 OS.indent(Indent) << "Written : " << Written << '\n';
290 } else {
291 OS.indent(Indent) << "Invalid knowledge\n";
292 }
293 }
294
295 /// Combine two knowledges, this and @p That.
learnFrom(Knowledge That)296 void learnFrom(Knowledge That) {
297 assert(!isConflicting(*this, That));
298 assert(Unused && That.Occupied);
299 assert(
300 !That.Unused &&
301 "This function is only prepared to learn occupied elements from That");
302 assert(!Occupied && "This function does not implement "
303 "`this->Occupied = "
304 "this->Occupied.unite(That.Occupied);`");
305
306 Unused = Unused.subtract(That.Occupied);
307 Known = Known.unite(That.Known);
308 Written = Written.unite(That.Written);
309
310 checkConsistency();
311 }
312
313 /// Determine whether two Knowledges conflict with each other.
314 ///
315 /// In theory @p Existing and @p Proposed are symmetric, but the
316 /// implementation is constrained by the implicit interpretation. That is, @p
317 /// Existing must have #Unused defined (use case 1) and @p Proposed must have
318 /// #Occupied defined (use case 1).
319 ///
320 /// A conflict is defined as non-preserved semantics when they are merged. For
321 /// instance, when for the same array and zone they assume different
322 /// llvm::Values.
323 ///
324 /// @param Existing One of the knowledges with #Unused defined.
325 /// @param Proposed One of the knowledges with #Occupied defined.
326 /// @param OS Dump the conflict reason to this output stream; use
327 /// nullptr to not output anything.
328 /// @param Indent Indention for the conflict reason.
329 ///
330 /// @return True, iff the two knowledges are conflicting.
isConflicting(const Knowledge & Existing,const Knowledge & Proposed,llvm::raw_ostream * OS=nullptr,unsigned Indent=0)331 static bool isConflicting(const Knowledge &Existing,
332 const Knowledge &Proposed,
333 llvm::raw_ostream *OS = nullptr,
334 unsigned Indent = 0) {
335 assert(Existing.Unused);
336 assert(Proposed.Occupied);
337
338 #ifndef NDEBUG
339 if (Existing.Occupied && Proposed.Unused) {
340 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused);
341 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused);
342 assert(ExistingUniverse.is_equal(ProposedUniverse) &&
343 "Both inputs' Knowledges must be over the same universe");
344 }
345 #endif
346
347 // Do the Existing and Proposed lifetimes conflict?
348 //
349 // Lifetimes are described as the cross-product of array elements and zone
350 // intervals in which they are alive (the space { [Element[] -> Zone[]] }).
351 // In the following we call this "element/lifetime interval".
352 //
353 // In order to not conflict, one of the following conditions must apply for
354 // each element/lifetime interval:
355 //
356 // 1. If occupied in one of the knowledges, it is unused in the other.
357 //
358 // - or -
359 //
360 // 2. Both contain the same value.
361 //
362 // Instead of partitioning the element/lifetime intervals into a part that
363 // both Knowledges occupy (which requires an expensive subtraction) and for
364 // these to check whether they are known to be the same value, we check only
365 // the second condition and ensure that it also applies when then first
366 // condition is true. This is done by adding a wildcard value to
367 // Proposed.Known and Existing.Unused such that they match as a common known
368 // value. We use the "unknown ValInst" for this purpose. Every
369 // Existing.Unused may match with an unknown Proposed.Occupied because these
370 // never are in conflict with each other.
371 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied);
372 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal);
373
374 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused);
375 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal);
376
377 auto MatchingVals = ExistingValues.intersect(ProposedValues);
378 auto Matches = MatchingVals.domain();
379
380 // Any Proposed.Occupied must either have a match between the known values
381 // of Existing and Occupied, or be in Existing.Unused. In the latter case,
382 // the previously added "AnyVal" will match each other.
383 if (!Proposed.Occupied.is_subset(Matches)) {
384 if (OS) {
385 auto Conflicting = Proposed.Occupied.subtract(Matches);
386 auto ExistingConflictingKnown =
387 Existing.Known.intersect_domain(Conflicting);
388 auto ProposedConflictingKnown =
389 Proposed.Known.intersect_domain(Conflicting);
390
391 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n";
392 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n";
393 if (!ExistingConflictingKnown.is_empty())
394 OS->indent(Indent)
395 << "Existing Known: " << ExistingConflictingKnown << "\n";
396 if (!ProposedConflictingKnown.is_empty())
397 OS->indent(Indent)
398 << "Proposed Known: " << ProposedConflictingKnown << "\n";
399 }
400 return true;
401 }
402
403 // Do the writes in Existing conflict with occupied values in Proposed?
404 //
405 // In order to not conflict, it must either write to unused lifetime or
406 // write the same value. To check, we remove the writes that write into
407 // Proposed.Unused (they never conflict) and then see whether the written
408 // value is already in Proposed.Known. If there are multiple known values
409 // and a written value is known under different names, it is enough when one
410 // of the written values (assuming that they are the same value under
411 // different names, e.g. a PHINode and one of the incoming values) matches
412 // one of the known names.
413 //
414 // We convert here the set of lifetimes to actual timepoints. A lifetime is
415 // in conflict with a set of write timepoints, if either a live timepoint is
416 // clearly within the lifetime or if a write happens at the beginning of the
417 // lifetime (where it would conflict with the value that actually writes the
418 // value alive). There is no conflict at the end of a lifetime, as the alive
419 // value will always be read, before it is overwritten again. The last
420 // property holds in Polly for all scalar values and we expect all users of
421 // Knowledge to check this property also for accesses to MemoryKind::Array.
422 auto ProposedFixedDefs =
423 convertZoneToTimepoints(Proposed.Occupied, true, false);
424 auto ProposedFixedKnown =
425 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false);
426
427 auto ExistingConflictingWrites =
428 Existing.Written.intersect_domain(ProposedFixedDefs);
429 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain();
430
431 auto CommonWrittenVal =
432 ProposedFixedKnown.intersect(ExistingConflictingWrites);
433 auto CommonWrittenValDomain = CommonWrittenVal.domain();
434
435 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) {
436 if (OS) {
437 auto ExistingConflictingWritten =
438 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain);
439 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain(
440 ExistingConflictingWritten.domain());
441
442 OS->indent(Indent)
443 << "Proposed a lifetime where there is an Existing write into it\n";
444 OS->indent(Indent) << "Existing conflicting writes: "
445 << ExistingConflictingWritten << "\n";
446 if (!ProposedConflictingKnown.is_empty())
447 OS->indent(Indent)
448 << "Proposed conflicting known: " << ProposedConflictingKnown
449 << "\n";
450 }
451 return true;
452 }
453
454 // Do the writes in Proposed conflict with occupied values in Existing?
455 auto ExistingAvailableDefs =
456 convertZoneToTimepoints(Existing.Unused, true, false);
457 auto ExistingKnownDefs =
458 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false);
459
460 auto ProposedWrittenDomain = Proposed.Written.domain();
461 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written);
462 auto IdenticalOrUnused =
463 ExistingAvailableDefs.unite(KnownIdentical.domain());
464 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) {
465 if (OS) {
466 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused);
467 auto ExistingConflictingKnown =
468 ExistingKnownDefs.intersect_domain(Conflicting);
469 auto ProposedConflictingWritten =
470 Proposed.Written.intersect_domain(Conflicting);
471
472 OS->indent(Indent) << "Proposed writes into range used by Existing\n";
473 OS->indent(Indent) << "Proposed conflicting writes: "
474 << ProposedConflictingWritten << "\n";
475 if (!ExistingConflictingKnown.is_empty())
476 OS->indent(Indent)
477 << "Existing conflicting known: " << ExistingConflictingKnown
478 << "\n";
479 }
480 return true;
481 }
482
483 // Does Proposed write at the same time as Existing already does (order of
484 // writes is undefined)? Writing the same value is permitted.
485 auto ExistingWrittenDomain = Existing.Written.domain();
486 auto BothWritten =
487 Existing.Written.domain().intersect(Proposed.Written.domain());
488 auto ExistingKnownWritten = filterKnownValInst(Existing.Written);
489 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written);
490 auto CommonWritten =
491 ExistingKnownWritten.intersect(ProposedKnownWritten).domain();
492
493 if (!BothWritten.is_subset(CommonWritten)) {
494 if (OS) {
495 auto Conflicting = BothWritten.subtract(CommonWritten);
496 auto ExistingConflictingWritten =
497 Existing.Written.intersect_domain(Conflicting);
498 auto ProposedConflictingWritten =
499 Proposed.Written.intersect_domain(Conflicting);
500
501 OS->indent(Indent) << "Proposed writes at the same time as an already "
502 "Existing write\n";
503 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n";
504 if (!ExistingConflictingWritten.is_empty())
505 OS->indent(Indent)
506 << "Exiting write: " << ExistingConflictingWritten << "\n";
507 if (!ProposedConflictingWritten.is_empty())
508 OS->indent(Indent)
509 << "Proposed write: " << ProposedConflictingWritten << "\n";
510 }
511 return true;
512 }
513
514 return false;
515 }
516 };
517
518 /// Implementation of the DeLICM/DePRE transformation.
519 class DeLICMImpl : public ZoneAlgorithm {
520 private:
521 /// Knowledge before any transformation took place.
522 Knowledge OriginalZone;
523
524 /// Current knowledge of the SCoP including all already applied
525 /// transformations.
526 Knowledge Zone;
527
528 /// Number of StoreInsts something can be mapped to.
529 int NumberOfCompatibleTargets = 0;
530
531 /// The number of StoreInsts to which at least one value or PHI has been
532 /// mapped to.
533 int NumberOfTargetsMapped = 0;
534
535 /// The number of llvm::Value mapped to some array element.
536 int NumberOfMappedValueScalars = 0;
537
538 /// The number of PHIs mapped to some array element.
539 int NumberOfMappedPHIScalars = 0;
540
541 /// Determine whether two knowledges are conflicting with each other.
542 ///
543 /// @see Knowledge::isConflicting
isConflicting(const Knowledge & Proposed)544 bool isConflicting(const Knowledge &Proposed) {
545 raw_ostream *OS = nullptr;
546 LLVM_DEBUG(OS = &llvm::dbgs());
547 return Knowledge::isConflicting(Zone, Proposed, OS, 4);
548 }
549
550 /// Determine whether @p SAI is a scalar that can be mapped to an array
551 /// element.
isMappable(const ScopArrayInfo * SAI)552 bool isMappable(const ScopArrayInfo *SAI) {
553 assert(SAI);
554
555 if (SAI->isValueKind()) {
556 auto *MA = S->getValueDef(SAI);
557 if (!MA) {
558 LLVM_DEBUG(
559 dbgs()
560 << " Reject because value is read-only within the scop\n");
561 return false;
562 }
563
564 // Mapping if value is used after scop is not supported. The code
565 // generator would need to reload the scalar after the scop, but it
566 // does not have the information to where it is mapped to. Only the
567 // MemoryAccesses have that information, not the ScopArrayInfo.
568 auto Inst = MA->getAccessInstruction();
569 for (auto User : Inst->users()) {
570 if (!isa<Instruction>(User))
571 return false;
572 auto UserInst = cast<Instruction>(User);
573
574 if (!S->contains(UserInst)) {
575 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n");
576 return false;
577 }
578 }
579
580 return true;
581 }
582
583 if (SAI->isPHIKind()) {
584 auto *MA = S->getPHIRead(SAI);
585 assert(MA);
586
587 // Mapping of an incoming block from before the SCoP is not supported by
588 // the code generator.
589 auto PHI = cast<PHINode>(MA->getAccessInstruction());
590 for (auto Incoming : PHI->blocks()) {
591 if (!S->contains(Incoming)) {
592 LLVM_DEBUG(dbgs()
593 << " Reject because at least one incoming block is "
594 "not in the scop region\n");
595 return false;
596 }
597 }
598
599 return true;
600 }
601
602 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n");
603 return false;
604 }
605
606 /// Compute the uses of a MemoryKind::Value and its lifetime (from its
607 /// definition to the last use).
608 ///
609 /// @param SAI The ScopArrayInfo representing the value's storage.
610 ///
611 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] }
612 /// First element is the set of uses for each definition.
613 /// The second is the lifetime of each definition.
614 std::tuple<isl::union_map, isl::map>
computeValueUses(const ScopArrayInfo * SAI)615 computeValueUses(const ScopArrayInfo *SAI) {
616 assert(SAI->isValueKind());
617
618 // { DomainRead[] }
619 auto Reads = makeEmptyUnionSet();
620
621 // Find all uses.
622 for (auto *MA : S->getValueUses(SAI))
623 Reads = Reads.add_set(getDomainFor(MA));
624
625 // { DomainRead[] -> Scatter[] }
626 auto ReadSchedule = getScatterFor(Reads);
627
628 auto *DefMA = S->getValueDef(SAI);
629 assert(DefMA);
630
631 // { DomainDef[] }
632 auto Writes = getDomainFor(DefMA);
633
634 // { DomainDef[] -> Scatter[] }
635 auto WriteScatter = getScatterFor(Writes);
636
637 // { Scatter[] -> DomainDef[] }
638 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement());
639
640 // { [DomainDef[] -> Scatter[]] -> DomainUse[] }
641 auto Uses = isl::union_map(ReachDef.reverse().range_map())
642 .apply_range(ReadSchedule.reverse());
643
644 // { DomainDef[] -> Scatter[] }
645 auto UseScatter =
646 singleton(Uses.domain().unwrap(),
647 Writes.get_space().map_from_domain_and_range(ScatterSpace));
648
649 // { DomainDef[] -> Zone[] }
650 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true);
651
652 // { DomainDef[] -> DomainRead[] }
653 auto DefUses = Uses.domain_factor_domain();
654
655 return std::make_pair(DefUses, Lifetime);
656 }
657
658 /// Try to map a MemoryKind::Value to a given array element.
659 ///
660 /// @param SAI Representation of the scalar's memory to map.
661 /// @param TargetElt { Scatter[] -> Element[] }
662 /// Suggestion where to map a scalar to when at a timepoint.
663 ///
664 /// @return true if the scalar was successfully mapped.
tryMapValue(const ScopArrayInfo * SAI,isl::map TargetElt)665 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) {
666 assert(SAI->isValueKind());
667
668 auto *DefMA = S->getValueDef(SAI);
669 assert(DefMA->isValueKind());
670 assert(DefMA->isMustWrite());
671 auto *V = DefMA->getAccessValue();
672 auto *DefInst = DefMA->getAccessInstruction();
673
674 // Stop if the scalar has already been mapped.
675 if (!DefMA->getLatestScopArrayInfo()->isValueKind())
676 return false;
677
678 // { DomainDef[] -> Scatter[] }
679 auto DefSched = getScatterFor(DefMA);
680
681 // Where each write is mapped to, according to the suggestion.
682 // { DomainDef[] -> Element[] }
683 auto DefTarget = TargetElt.apply_domain(DefSched.reverse());
684 simplify(DefTarget);
685 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n');
686
687 auto OrigDomain = getDomainFor(DefMA);
688 auto MappedDomain = DefTarget.domain();
689 if (!OrigDomain.is_subset(MappedDomain)) {
690 LLVM_DEBUG(
691 dbgs()
692 << " Reject because mapping does not encompass all instances\n");
693 return false;
694 }
695
696 // { DomainDef[] -> Zone[] }
697 isl::map Lifetime;
698
699 // { DomainDef[] -> DomainUse[] }
700 isl::union_map DefUses;
701
702 std::tie(DefUses, Lifetime) = computeValueUses(SAI);
703 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n');
704
705 /// { [Element[] -> Zone[]] }
706 auto EltZone = Lifetime.apply_domain(DefTarget).wrap();
707 simplify(EltZone);
708
709 // When known knowledge is disabled, just return the unknown value. It will
710 // either get filtered out or conflict with itself.
711 // { DomainDef[] -> ValInst[] }
712 isl::map ValInst;
713 if (DelicmComputeKnown)
714 ValInst = makeValInst(V, DefMA->getStatement(),
715 LI->getLoopFor(DefInst->getParent()));
716 else
717 ValInst = makeUnknownForDomain(DefMA->getStatement());
718
719 // { DomainDef[] -> [Element[] -> Zone[]] }
720 auto EltKnownTranslator = DefTarget.range_product(Lifetime);
721
722 // { [Element[] -> Zone[]] -> ValInst[] }
723 auto EltKnown = ValInst.apply_domain(EltKnownTranslator);
724 simplify(EltKnown);
725
726 // { DomainDef[] -> [Element[] -> Scatter[]] }
727 auto WrittenTranslator = DefTarget.range_product(DefSched);
728
729 // { [Element[] -> Scatter[]] -> ValInst[] }
730 auto DefEltSched = ValInst.apply_domain(WrittenTranslator);
731 simplify(DefEltSched);
732
733 Knowledge Proposed(EltZone, nullptr, filterKnownValInst(EltKnown),
734 DefEltSched);
735 if (isConflicting(Proposed))
736 return false;
737
738 // { DomainUse[] -> Element[] }
739 auto UseTarget = DefUses.reverse().apply_range(DefTarget);
740
741 mapValue(SAI, std::move(DefTarget), std::move(UseTarget),
742 std::move(Lifetime), std::move(Proposed));
743 return true;
744 }
745
746 /// After a scalar has been mapped, update the global knowledge.
applyLifetime(Knowledge Proposed)747 void applyLifetime(Knowledge Proposed) {
748 Zone.learnFrom(std::move(Proposed));
749 }
750
751 /// Map a MemoryKind::Value scalar to an array element.
752 ///
753 /// Callers must have ensured that the mapping is valid and not conflicting.
754 ///
755 /// @param SAI The ScopArrayInfo representing the scalar's memory to
756 /// map.
757 /// @param DefTarget { DomainDef[] -> Element[] }
758 /// The array element to map the scalar to.
759 /// @param UseTarget { DomainUse[] -> Element[] }
760 /// The array elements the uses are mapped to.
761 /// @param Lifetime { DomainDef[] -> Zone[] }
762 /// The lifetime of each llvm::Value definition for
763 /// reporting.
764 /// @param Proposed Mapping constraints for reporting.
mapValue(const ScopArrayInfo * SAI,isl::map DefTarget,isl::union_map UseTarget,isl::map Lifetime,Knowledge Proposed)765 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget,
766 isl::union_map UseTarget, isl::map Lifetime,
767 Knowledge Proposed) {
768 // Redirect the read accesses.
769 for (auto *MA : S->getValueUses(SAI)) {
770 // { DomainUse[] }
771 auto Domain = getDomainFor(MA);
772
773 // { DomainUse[] -> Element[] }
774 auto NewAccRel = UseTarget.intersect_domain(Domain);
775 simplify(NewAccRel);
776
777 assert(isl_union_map_n_map(NewAccRel.get()) == 1);
778 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel));
779 }
780
781 auto *WA = S->getValueDef(SAI);
782 WA->setNewAccessRelation(DefTarget);
783 applyLifetime(Proposed);
784
785 MappedValueScalars++;
786 NumberOfMappedValueScalars += 1;
787 }
788
makeValInst(Value * Val,ScopStmt * UserStmt,Loop * Scope,bool IsCertain=true)789 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope,
790 bool IsCertain = true) {
791 // When known knowledge is disabled, just return the unknown value. It will
792 // either get filtered out or conflict with itself.
793 if (!DelicmComputeKnown)
794 return makeUnknownForDomain(UserStmt);
795 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain);
796 }
797
798 /// Express the incoming values of a PHI for each incoming statement in an
799 /// isl::union_map.
800 ///
801 /// @param SAI The PHI scalar represented by a ScopArrayInfo.
802 ///
803 /// @return { PHIWriteDomain[] -> ValInst[] }
determinePHIWrittenValues(const ScopArrayInfo * SAI)804 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) {
805 auto Result = makeEmptyUnionMap();
806
807 // Collect the incoming values.
808 for (auto *MA : S->getPHIIncomings(SAI)) {
809 // { DomainWrite[] -> ValInst[] }
810 isl::union_map ValInst;
811 auto *WriteStmt = MA->getStatement();
812
813 auto Incoming = MA->getIncoming();
814 assert(!Incoming.empty());
815 if (Incoming.size() == 1) {
816 ValInst = makeValInst(Incoming[0].second, WriteStmt,
817 LI->getLoopFor(Incoming[0].first));
818 } else {
819 // If the PHI is in a subregion's exit node it can have multiple
820 // incoming values (+ maybe another incoming edge from an unrelated
821 // block). We cannot directly represent it as a single llvm::Value.
822 // We currently model it as unknown value, but modeling as the PHIInst
823 // itself could be OK, too.
824 ValInst = makeUnknownForDomain(WriteStmt);
825 }
826
827 Result = Result.unite(ValInst);
828 }
829
830 assert(Result.is_single_valued() &&
831 "Cannot have multiple incoming values for same incoming statement");
832 return Result;
833 }
834
835 /// Try to map a MemoryKind::PHI scalar to a given array element.
836 ///
837 /// @param SAI Representation of the scalar's memory to map.
838 /// @param TargetElt { Scatter[] -> Element[] }
839 /// Suggestion where to map the scalar to when at a
840 /// timepoint.
841 ///
842 /// @return true if the PHI scalar has been mapped.
tryMapPHI(const ScopArrayInfo * SAI,isl::map TargetElt)843 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) {
844 auto *PHIRead = S->getPHIRead(SAI);
845 assert(PHIRead->isPHIKind());
846 assert(PHIRead->isRead());
847
848 // Skip if already been mapped.
849 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind())
850 return false;
851
852 // { DomainRead[] -> Scatter[] }
853 auto PHISched = getScatterFor(PHIRead);
854
855 // { DomainRead[] -> Element[] }
856 auto PHITarget = PHISched.apply_range(TargetElt);
857 simplify(PHITarget);
858 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n');
859
860 auto OrigDomain = getDomainFor(PHIRead);
861 auto MappedDomain = PHITarget.domain();
862 if (!OrigDomain.is_subset(MappedDomain)) {
863 LLVM_DEBUG(
864 dbgs()
865 << " Reject because mapping does not encompass all instances\n");
866 return false;
867 }
868
869 // { DomainRead[] -> DomainWrite[] }
870 auto PerPHIWrites = computePerPHI(SAI);
871
872 // { DomainWrite[] -> Element[] }
873 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse();
874 simplify(WritesTarget);
875
876 // { DomainWrite[] }
877 auto UniverseWritesDom = isl::union_set::empty(ParamSpace);
878
879 for (auto *MA : S->getPHIIncomings(SAI))
880 UniverseWritesDom = UniverseWritesDom.add_set(getDomainFor(MA));
881
882 auto RelevantWritesTarget = WritesTarget;
883 if (DelicmOverapproximateWrites)
884 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom);
885
886 auto ExpandedWritesDom = WritesTarget.domain();
887 if (!DelicmPartialWrites &&
888 !UniverseWritesDom.is_subset(ExpandedWritesDom)) {
889 LLVM_DEBUG(
890 dbgs() << " Reject because did not find PHI write mapping for "
891 "all instances\n");
892 if (DelicmOverapproximateWrites)
893 LLVM_DEBUG(dbgs() << " Relevant Mapping: "
894 << RelevantWritesTarget << '\n');
895 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget
896 << '\n');
897 LLVM_DEBUG(dbgs() << " Missing instances: "
898 << UniverseWritesDom.subtract(ExpandedWritesDom)
899 << '\n');
900 return false;
901 }
902
903 // { DomainRead[] -> Scatter[] }
904 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule);
905 isl::map PerPHIWriteScatter =
906 singleton(PerPHIWriteScatterUmap, PHISched.get_space());
907
908 // { DomainRead[] -> Zone[] }
909 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true);
910 simplify(Lifetime);
911 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n");
912
913 // { DomainWrite[] -> Zone[] }
914 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites);
915
916 // { DomainWrite[] -> ValInst[] }
917 auto WrittenValue = determinePHIWrittenValues(SAI);
918
919 // { DomainWrite[] -> [Element[] -> Scatter[]] }
920 auto WrittenTranslator = WritesTarget.range_product(Schedule);
921
922 // { [Element[] -> Scatter[]] -> ValInst[] }
923 auto Written = WrittenValue.apply_domain(WrittenTranslator);
924 simplify(Written);
925
926 // { DomainWrite[] -> [Element[] -> Zone[]] }
927 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime);
928
929 // { DomainWrite[] -> ValInst[] }
930 auto WrittenKnownValue = filterKnownValInst(WrittenValue);
931
932 // { [Element[] -> Zone[]] -> ValInst[] }
933 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator);
934 simplify(EltLifetimeInst);
935
936 // { [Element[] -> Zone[] }
937 auto Occupied = LifetimeTranslator.range();
938 simplify(Occupied);
939
940 Knowledge Proposed(Occupied, nullptr, EltLifetimeInst, Written);
941 if (isConflicting(Proposed))
942 return false;
943
944 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget),
945 std::move(Lifetime), std::move(Proposed));
946 return true;
947 }
948
949 /// Map a MemoryKind::PHI scalar to an array element.
950 ///
951 /// Callers must have ensured that the mapping is valid and not conflicting
952 /// with the common knowledge.
953 ///
954 /// @param SAI The ScopArrayInfo representing the scalar's memory to
955 /// map.
956 /// @param ReadTarget { DomainRead[] -> Element[] }
957 /// The array element to map the scalar to.
958 /// @param WriteTarget { DomainWrite[] -> Element[] }
959 /// New access target for each PHI incoming write.
960 /// @param Lifetime { DomainRead[] -> Zone[] }
961 /// The lifetime of each PHI for reporting.
962 /// @param Proposed Mapping constraints for reporting.
mapPHI(const ScopArrayInfo * SAI,isl::map ReadTarget,isl::union_map WriteTarget,isl::map Lifetime,Knowledge Proposed)963 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget,
964 isl::union_map WriteTarget, isl::map Lifetime,
965 Knowledge Proposed) {
966 // { Element[] }
967 isl::space ElementSpace = ReadTarget.get_space().range();
968
969 // Redirect the PHI incoming writes.
970 for (auto *MA : S->getPHIIncomings(SAI)) {
971 // { DomainWrite[] }
972 auto Domain = getDomainFor(MA);
973
974 // { DomainWrite[] -> Element[] }
975 auto NewAccRel = WriteTarget.intersect_domain(Domain);
976 simplify(NewAccRel);
977
978 isl::space NewAccRelSpace =
979 Domain.get_space().map_from_domain_and_range(ElementSpace);
980 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace);
981 MA->setNewAccessRelation(NewAccRelMap);
982 }
983
984 // Redirect the PHI read.
985 auto *PHIRead = S->getPHIRead(SAI);
986 PHIRead->setNewAccessRelation(ReadTarget);
987 applyLifetime(Proposed);
988
989 MappedPHIScalars++;
990 NumberOfMappedPHIScalars++;
991 }
992
993 /// Search and map scalars to memory overwritten by @p TargetStoreMA.
994 ///
995 /// Start trying to map scalars that are used in the same statement as the
996 /// store. For every successful mapping, try to also map scalars of the
997 /// statements where those are written. Repeat, until no more mapping
998 /// opportunity is found.
999 ///
1000 /// There is currently no preference in which order scalars are tried.
1001 /// Ideally, we would direct it towards a load instruction of the same array
1002 /// element.
collapseScalarsToStore(MemoryAccess * TargetStoreMA)1003 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) {
1004 assert(TargetStoreMA->isLatestArrayKind());
1005 assert(TargetStoreMA->isMustWrite());
1006
1007 auto TargetStmt = TargetStoreMA->getStatement();
1008
1009 // { DomTarget[] }
1010 auto TargetDom = getDomainFor(TargetStmt);
1011
1012 // { DomTarget[] -> Element[] }
1013 auto TargetAccRel = getAccessRelationFor(TargetStoreMA);
1014
1015 // { Zone[] -> DomTarget[] }
1016 // For each point in time, find the next target store instance.
1017 auto Target =
1018 computeScalarReachingOverwrite(Schedule, TargetDom, false, true);
1019
1020 // { Zone[] -> Element[] }
1021 // Use the target store's write location as a suggestion to map scalars to.
1022 auto EltTarget = Target.apply_range(TargetAccRel);
1023 simplify(EltTarget);
1024 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n');
1025
1026 // Stack of elements not yet processed.
1027 SmallVector<MemoryAccess *, 16> Worklist;
1028
1029 // Set of scalars already tested.
1030 SmallPtrSet<const ScopArrayInfo *, 16> Closed;
1031
1032 // Lambda to add all scalar reads to the work list.
1033 auto ProcessAllIncoming = [&](ScopStmt *Stmt) {
1034 for (auto *MA : *Stmt) {
1035 if (!MA->isLatestScalarKind())
1036 continue;
1037 if (!MA->isRead())
1038 continue;
1039
1040 Worklist.push_back(MA);
1041 }
1042 };
1043
1044 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0);
1045 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal))
1046 Worklist.push_back(WrittenValInputMA);
1047 else
1048 ProcessAllIncoming(TargetStmt);
1049
1050 auto AnyMapped = false;
1051 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout();
1052 auto StoreSize =
1053 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType());
1054
1055 while (!Worklist.empty()) {
1056 auto *MA = Worklist.pop_back_val();
1057
1058 auto *SAI = MA->getScopArrayInfo();
1059 if (Closed.count(SAI))
1060 continue;
1061 Closed.insert(SAI);
1062 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI
1063 << ")\n");
1064
1065 // Skip non-mappable scalars.
1066 if (!isMappable(SAI))
1067 continue;
1068
1069 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType());
1070 if (MASize > StoreSize) {
1071 LLVM_DEBUG(
1072 dbgs() << " Reject because storage size is insufficient\n");
1073 continue;
1074 }
1075
1076 // Try to map MemoryKind::Value scalars.
1077 if (SAI->isValueKind()) {
1078 if (!tryMapValue(SAI, EltTarget))
1079 continue;
1080
1081 auto *DefAcc = S->getValueDef(SAI);
1082 ProcessAllIncoming(DefAcc->getStatement());
1083
1084 AnyMapped = true;
1085 continue;
1086 }
1087
1088 // Try to map MemoryKind::PHI scalars.
1089 if (SAI->isPHIKind()) {
1090 if (!tryMapPHI(SAI, EltTarget))
1091 continue;
1092 // Add inputs of all incoming statements to the worklist. Prefer the
1093 // input accesses of the incoming blocks.
1094 for (auto *PHIWrite : S->getPHIIncomings(SAI)) {
1095 auto *PHIWriteStmt = PHIWrite->getStatement();
1096 bool FoundAny = false;
1097 for (auto Incoming : PHIWrite->getIncoming()) {
1098 auto *IncomingInputMA =
1099 PHIWriteStmt->lookupInputAccessOf(Incoming.second);
1100 if (!IncomingInputMA)
1101 continue;
1102
1103 Worklist.push_back(IncomingInputMA);
1104 FoundAny = true;
1105 }
1106
1107 if (!FoundAny)
1108 ProcessAllIncoming(PHIWrite->getStatement());
1109 }
1110
1111 AnyMapped = true;
1112 continue;
1113 }
1114 }
1115
1116 if (AnyMapped) {
1117 TargetsMapped++;
1118 NumberOfTargetsMapped++;
1119 }
1120 return AnyMapped;
1121 }
1122
1123 /// Compute when an array element is unused.
1124 ///
1125 /// @return { [Element[] -> Zone[]] }
computeLifetime() const1126 isl::union_set computeLifetime() const {
1127 // { Element[] -> Zone[] }
1128 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads,
1129 false, false, true);
1130
1131 auto Result = ArrayUnused.wrap();
1132
1133 simplify(Result);
1134 return Result;
1135 }
1136
1137 /// Determine when an array element is written to, and which value instance is
1138 /// written.
1139 ///
1140 /// @return { [Element[] -> Scatter[]] -> ValInst[] }
computeWritten() const1141 isl::union_map computeWritten() const {
1142 // { [Element[] -> Scatter[]] -> ValInst[] }
1143 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule);
1144
1145 simplify(EltWritten);
1146 return EltWritten;
1147 }
1148
1149 /// Determine whether an access touches at most one element.
1150 ///
1151 /// The accessed element could be a scalar or accessing an array with constant
1152 /// subscript, such that all instances access only that element.
1153 ///
1154 /// @param MA The access to test.
1155 ///
1156 /// @return True, if zero or one elements are accessed; False if at least two
1157 /// different elements are accessed.
isScalarAccess(MemoryAccess * MA)1158 bool isScalarAccess(MemoryAccess *MA) {
1159 auto Map = getAccessRelationFor(MA);
1160 auto Set = Map.range();
1161 return Set.is_singleton();
1162 }
1163
1164 /// Print mapping statistics to @p OS.
printStatistics(llvm::raw_ostream & OS,int Indent=0) const1165 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const {
1166 OS.indent(Indent) << "Statistics {\n";
1167 OS.indent(Indent + 4) << "Compatible overwrites: "
1168 << NumberOfCompatibleTargets << "\n";
1169 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped
1170 << '\n';
1171 OS.indent(Indent + 4) << "Value scalars mapped: "
1172 << NumberOfMappedValueScalars << '\n';
1173 OS.indent(Indent + 4) << "PHI scalars mapped: "
1174 << NumberOfMappedPHIScalars << '\n';
1175 OS.indent(Indent) << "}\n";
1176 }
1177
1178 /// Return whether at least one transformation been applied.
isModified() const1179 bool isModified() const { return NumberOfTargetsMapped > 0; }
1180
1181 public:
DeLICMImpl(Scop * S,LoopInfo * LI)1182 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {}
1183
1184 /// Calculate the lifetime (definition to last use) of every array element.
1185 ///
1186 /// @return True if the computed lifetimes (#Zone) is usable.
computeZone()1187 bool computeZone() {
1188 // Check that nothing strange occurs.
1189 collectCompatibleElts();
1190
1191 isl::union_set EltUnused;
1192 isl::union_map EltKnown, EltWritten;
1193
1194 {
1195 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps);
1196
1197 computeCommon();
1198
1199 EltUnused = computeLifetime();
1200 EltKnown = computeKnown(true, false);
1201 EltWritten = computeWritten();
1202 }
1203 DeLICMAnalyzed++;
1204
1205 if (!EltUnused || !EltKnown || !EltWritten) {
1206 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota &&
1207 "The only reason that these things have not been computed should "
1208 "be if the max-operations limit hit");
1209 DeLICMOutOfQuota++;
1210 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n");
1211 DebugLoc Begin, End;
1212 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End);
1213 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin,
1214 S->getEntry());
1215 R << "maximal number of operations exceeded during zone analysis";
1216 S->getFunction().getContext().diagnose(R);
1217 return false;
1218 }
1219
1220 Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltKnown, EltWritten);
1221 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4));
1222
1223 assert(Zone.isUsable() && OriginalZone.isUsable());
1224 return true;
1225 }
1226
1227 /// Try to map as many scalars to unused array elements as possible.
1228 ///
1229 /// Multiple scalars might be mappable to intersecting unused array element
1230 /// zones, but we can only chose one. This is a greedy algorithm, therefore
1231 /// the first processed element claims it.
greedyCollapse()1232 void greedyCollapse() {
1233 bool Modified = false;
1234
1235 for (auto &Stmt : *S) {
1236 for (auto *MA : Stmt) {
1237 if (!MA->isLatestArrayKind())
1238 continue;
1239 if (!MA->isWrite())
1240 continue;
1241
1242 if (MA->isMayWrite()) {
1243 LLVM_DEBUG(dbgs() << "Access " << MA
1244 << " pruned because it is a MAY_WRITE\n");
1245 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite",
1246 MA->getAccessInstruction());
1247 R << "Skipped possible mapping target because it is not an "
1248 "unconditional overwrite";
1249 S->getFunction().getContext().diagnose(R);
1250 continue;
1251 }
1252
1253 if (Stmt.getNumIterators() == 0) {
1254 LLVM_DEBUG(dbgs() << "Access " << MA
1255 << " pruned because it is not in a loop\n");
1256 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop",
1257 MA->getAccessInstruction());
1258 R << "skipped possible mapping target because it is not in a loop";
1259 S->getFunction().getContext().diagnose(R);
1260 continue;
1261 }
1262
1263 if (isScalarAccess(MA)) {
1264 LLVM_DEBUG(dbgs()
1265 << "Access " << MA
1266 << " pruned because it writes only a single element\n");
1267 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite",
1268 MA->getAccessInstruction());
1269 R << "skipped possible mapping target because the memory location "
1270 "written to does not depend on its outer loop";
1271 S->getFunction().getContext().diagnose(R);
1272 continue;
1273 }
1274
1275 if (!isa<StoreInst>(MA->getAccessInstruction())) {
1276 LLVM_DEBUG(dbgs() << "Access " << MA
1277 << " pruned because it is not a StoreInst\n");
1278 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore",
1279 MA->getAccessInstruction());
1280 R << "skipped possible mapping target because non-store instructions "
1281 "are not supported";
1282 S->getFunction().getContext().diagnose(R);
1283 continue;
1284 }
1285
1286 // Check for more than one element acces per statement instance.
1287 // Currently we expect write accesses to be functional, eg. disallow
1288 //
1289 // { Stmt[0] -> [i] : 0 <= i < 2 }
1290 //
1291 // This may occur when some accesses to the element write/read only
1292 // parts of the element, eg. a single byte. Polly then divides each
1293 // element into subelements of the smallest access length, normal access
1294 // then touch multiple of such subelements. It is very common when the
1295 // array is accesses with memset, memcpy or memmove which take i8*
1296 // arguments.
1297 isl::union_map AccRel = MA->getLatestAccessRelation();
1298 if (!AccRel.is_single_valued().is_true()) {
1299 LLVM_DEBUG(dbgs() << "Access " << MA
1300 << " is incompatible because it writes multiple "
1301 "elements per instance\n");
1302 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel",
1303 MA->getAccessInstruction());
1304 R << "skipped possible mapping target because it writes more than "
1305 "one element";
1306 S->getFunction().getContext().diagnose(R);
1307 continue;
1308 }
1309
1310 isl::union_set TouchedElts = AccRel.range();
1311 if (!TouchedElts.is_subset(CompatibleElts)) {
1312 LLVM_DEBUG(
1313 dbgs()
1314 << "Access " << MA
1315 << " is incompatible because it touches incompatible elements\n");
1316 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts",
1317 MA->getAccessInstruction());
1318 R << "skipped possible mapping target because a target location "
1319 "cannot be reliably analyzed";
1320 S->getFunction().getContext().diagnose(R);
1321 continue;
1322 }
1323
1324 assert(isCompatibleAccess(MA));
1325 NumberOfCompatibleTargets++;
1326 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n");
1327 if (collapseScalarsToStore(MA))
1328 Modified = true;
1329 }
1330 }
1331
1332 if (Modified)
1333 DeLICMScopsModified++;
1334 }
1335
1336 /// Dump the internal information about a performed DeLICM to @p OS.
print(llvm::raw_ostream & OS,int Indent=0)1337 void print(llvm::raw_ostream &OS, int Indent = 0) {
1338 if (!Zone.isUsable()) {
1339 OS.indent(Indent) << "Zone not computed\n";
1340 return;
1341 }
1342
1343 printStatistics(OS, Indent);
1344 if (!isModified()) {
1345 OS.indent(Indent) << "No modification has been made\n";
1346 return;
1347 }
1348 printAccesses(OS, Indent);
1349 }
1350 };
1351
1352 class DeLICM : public ScopPass {
1353 private:
1354 DeLICM(const DeLICM &) = delete;
1355 const DeLICM &operator=(const DeLICM &) = delete;
1356
1357 /// The pass implementation, also holding per-scop data.
1358 std::unique_ptr<DeLICMImpl> Impl;
1359
collapseToUnused(Scop & S)1360 void collapseToUnused(Scop &S) {
1361 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1362 Impl = std::make_unique<DeLICMImpl>(&S, &LI);
1363
1364 if (!Impl->computeZone()) {
1365 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n");
1366 return;
1367 }
1368
1369 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n");
1370 Impl->greedyCollapse();
1371
1372 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1373 LLVM_DEBUG(dbgs() << S);
1374 }
1375
1376 public:
1377 static char ID;
DeLICM()1378 explicit DeLICM() : ScopPass(ID) {}
1379
getAnalysisUsage(AnalysisUsage & AU) const1380 virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
1381 AU.addRequiredTransitive<ScopInfoRegionPass>();
1382 AU.addRequired<LoopInfoWrapperPass>();
1383 AU.setPreservesAll();
1384 }
1385
runOnScop(Scop & S)1386 virtual bool runOnScop(Scop &S) override {
1387 // Free resources for previous scop's computation, if not yet done.
1388 releaseMemory();
1389
1390 collapseToUnused(S);
1391
1392 auto ScopStats = S.getStatistics();
1393 NumValueWrites += ScopStats.NumValueWrites;
1394 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1395 NumPHIWrites += ScopStats.NumPHIWrites;
1396 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1397 NumSingletonWrites += ScopStats.NumSingletonWrites;
1398 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1399
1400 return false;
1401 }
1402
printScop(raw_ostream & OS,Scop & S) const1403 virtual void printScop(raw_ostream &OS, Scop &S) const override {
1404 if (!Impl)
1405 return;
1406 assert(Impl->getScop() == &S);
1407
1408 OS << "DeLICM result:\n";
1409 Impl->print(OS);
1410 }
1411
releaseMemory()1412 virtual void releaseMemory() override { Impl.reset(); }
1413 };
1414
1415 char DeLICM::ID;
1416 } // anonymous namespace
1417
createDeLICMPass()1418 Pass *polly::createDeLICMPass() { return new DeLICM(); }
1419
1420 INITIALIZE_PASS_BEGIN(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false,
1421 false)
INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)1422 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)
1423 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1424 INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false,
1425 false)
1426
1427 bool polly::isConflicting(
1428 isl::union_set ExistingOccupied, isl::union_set ExistingUnused,
1429 isl::union_map ExistingKnown, isl::union_map ExistingWrites,
1430 isl::union_set ProposedOccupied, isl::union_set ProposedUnused,
1431 isl::union_map ProposedKnown, isl::union_map ProposedWrites,
1432 llvm::raw_ostream *OS, unsigned Indent) {
1433 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused),
1434 std::move(ExistingKnown), std::move(ExistingWrites));
1435 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused),
1436 std::move(ProposedKnown), std::move(ProposedWrites));
1437
1438 return Knowledge::isConflicting(Existing, Proposed, OS, Indent);
1439 }
1440