1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
53 static cl::list<std::string> ImplicitCheckNot(
54 "implicit-check-not",
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
59
60 static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
64
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
66
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
70
71 namespace Check {
72 enum CheckType {
73 CheckNone = 0,
74 CheckPlain,
75 CheckNext,
76 CheckSame,
77 CheckNot,
78 CheckDAG,
79 CheckLabel,
80
81 /// MatchEOF - When set, this pattern only matches the end of file. This is
82 /// used for trailing CHECK-NOTs.
83 CheckEOF
84 };
85 }
86
87 class Pattern {
88 SMLoc PatternLoc;
89
90 Check::CheckType CheckTy;
91
92 /// FixedStr - If non-empty, this pattern is a fixed string match with the
93 /// specified fixed string.
94 StringRef FixedStr;
95
96 /// RegEx - If non-empty, this is a regex pattern.
97 std::string RegExStr;
98
99 /// \brief Contains the number of line this pattern is in.
100 unsigned LineNumber;
101
102 /// VariableUses - Entries in this vector map to uses of a variable in the
103 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
104 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105 /// value of bar at offset 3.
106 std::vector<std::pair<StringRef, unsigned> > VariableUses;
107
108 /// VariableDefs - Maps definitions of variables to their parenthesized
109 /// capture numbers.
110 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111 std::map<StringRef, unsigned> VariableDefs;
112
113 public:
114
Pattern(Check::CheckType Ty)115 Pattern(Check::CheckType Ty)
116 : CheckTy(Ty) { }
117
118 /// getLoc - Return the location in source code.
getLoc() const119 SMLoc getLoc() const { return PatternLoc; }
120
121 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122 /// which prefix is being matched, SM provides the SourceMgr used for error
123 /// reports, and LineNumber is the line number in the input file from which
124 /// the pattern string was read. Returns true in case of an error, false
125 /// otherwise.
126 bool ParsePattern(StringRef PatternStr,
127 StringRef Prefix,
128 SourceMgr &SM,
129 unsigned LineNumber);
130
131 /// Match - Match the pattern string against the input buffer Buffer. This
132 /// returns the position that is matched or npos if there is no match. If
133 /// there is a match, the size of the matched string is returned in MatchLen.
134 ///
135 /// The VariableTable StringMap provides the current values of filecheck
136 /// variables and is updated if this match defines new values.
137 size_t Match(StringRef Buffer, size_t &MatchLen,
138 StringMap<StringRef> &VariableTable) const;
139
140 /// PrintFailureInfo - Print additional information about a failure to match
141 /// involving this pattern.
142 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143 const StringMap<StringRef> &VariableTable) const;
144
hasVariable() const145 bool hasVariable() const { return !(VariableUses.empty() &&
146 VariableDefs.empty()); }
147
getCheckTy() const148 Check::CheckType getCheckTy() const { return CheckTy; }
149
150 private:
151 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152 void AddBackrefToRegEx(unsigned BackrefNum);
153
154 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155 /// matching this pattern at the start of \arg Buffer; a distance of zero
156 /// should correspond to a perfect match.
157 unsigned ComputeMatchDistance(StringRef Buffer,
158 const StringMap<StringRef> &VariableTable) const;
159
160 /// \brief Evaluates expression and stores the result to \p Value.
161 /// \return true on success. false when the expression has invalid syntax.
162 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
163
164 /// \brief Finds the closing sequence of a regex variable usage or
165 /// definition. Str has to point in the beginning of the definition
166 /// (right after the opening sequence).
167 /// \return offset of the closing sequence within Str, or npos if it was not
168 /// found.
169 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
170 };
171
172
ParsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,unsigned LineNumber)173 bool Pattern::ParsePattern(StringRef PatternStr,
174 StringRef Prefix,
175 SourceMgr &SM,
176 unsigned LineNumber) {
177 this->LineNumber = LineNumber;
178 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
179
180 // Ignore trailing whitespace.
181 while (!PatternStr.empty() &&
182 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
184
185 // Check that there is something on the line.
186 if (PatternStr.empty()) {
187 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188 "found empty check string with prefix '" +
189 Prefix + ":'");
190 return true;
191 }
192
193 // Check to see if this is a fixed string, or if it has regex pieces.
194 if (PatternStr.size() < 2 ||
195 (PatternStr.find("{{") == StringRef::npos &&
196 PatternStr.find("[[") == StringRef::npos)) {
197 FixedStr = PatternStr;
198 return false;
199 }
200
201 // Paren value #0 is for the fully matched string. Any new parenthesized
202 // values add from there.
203 unsigned CurParen = 1;
204
205 // Otherwise, there is at least one regex piece. Build up the regex pattern
206 // by escaping scary characters in fixed strings, building up one big regex.
207 while (!PatternStr.empty()) {
208 // RegEx matches.
209 if (PatternStr.startswith("{{")) {
210 // This is the start of a regex match. Scan for the }}.
211 size_t End = PatternStr.find("}}");
212 if (End == StringRef::npos) {
213 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
214 SourceMgr::DK_Error,
215 "found start of regex string with no end '}}'");
216 return true;
217 }
218
219 // Enclose {{}} patterns in parens just like [[]] even though we're not
220 // capturing the result for any purpose. This is required in case the
221 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
222 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
223 RegExStr += '(';
224 ++CurParen;
225
226 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
227 return true;
228 RegExStr += ')';
229
230 PatternStr = PatternStr.substr(End+2);
231 continue;
232 }
233
234 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
235 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236 // second form is [[foo]] which is a reference to foo. The variable name
237 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
238 // it. This is to catch some common errors.
239 if (PatternStr.startswith("[[")) {
240 // Find the closing bracket pair ending the match. End is going to be an
241 // offset relative to the beginning of the match string.
242 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
243
244 if (End == StringRef::npos) {
245 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
246 SourceMgr::DK_Error,
247 "invalid named regex reference, no ]] found");
248 return true;
249 }
250
251 StringRef MatchStr = PatternStr.substr(2, End);
252 PatternStr = PatternStr.substr(End+4);
253
254 // Get the regex name (e.g. "foo").
255 size_t NameEnd = MatchStr.find(':');
256 StringRef Name = MatchStr.substr(0, NameEnd);
257
258 if (Name.empty()) {
259 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260 "invalid name in named regex: empty name");
261 return true;
262 }
263
264 // Verify that the name/expression is well formed. FileCheck currently
265 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266 // is relaxed, more strict check is performed in \c EvaluateExpression.
267 bool IsExpression = false;
268 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269 if (i == 0 && Name[i] == '@') {
270 if (NameEnd != StringRef::npos) {
271 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
272 SourceMgr::DK_Error,
273 "invalid name in named regex definition");
274 return true;
275 }
276 IsExpression = true;
277 continue;
278 }
279 if (Name[i] != '_' && !isalnum(Name[i]) &&
280 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
282 SourceMgr::DK_Error, "invalid name in named regex");
283 return true;
284 }
285 }
286
287 // Name can't start with a digit.
288 if (isdigit(static_cast<unsigned char>(Name[0]))) {
289 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290 "invalid name in named regex");
291 return true;
292 }
293
294 // Handle [[foo]].
295 if (NameEnd == StringRef::npos) {
296 // Handle variables that were defined earlier on the same line by
297 // emitting a backreference.
298 if (VariableDefs.find(Name) != VariableDefs.end()) {
299 unsigned VarParenNum = VariableDefs[Name];
300 if (VarParenNum < 1 || VarParenNum > 9) {
301 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
302 SourceMgr::DK_Error,
303 "Can't back-reference more than 9 variables");
304 return true;
305 }
306 AddBackrefToRegEx(VarParenNum);
307 } else {
308 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
309 }
310 continue;
311 }
312
313 // Handle [[foo:.*]].
314 VariableDefs[Name] = CurParen;
315 RegExStr += '(';
316 ++CurParen;
317
318 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
319 return true;
320
321 RegExStr += ')';
322 }
323
324 // Handle fixed string matches.
325 // Find the end, which is the start of the next regex.
326 size_t FixedMatchEnd = PatternStr.find("{{");
327 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
328 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
329 PatternStr = PatternStr.substr(FixedMatchEnd);
330 }
331
332 return false;
333 }
334
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
336 SourceMgr &SM) {
337 Regex R(RS);
338 std::string Error;
339 if (!R.isValid(Error)) {
340 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
341 "invalid regex: " + Error);
342 return true;
343 }
344
345 RegExStr += RS.str();
346 CurParen += R.getNumMatches();
347 return false;
348 }
349
AddBackrefToRegEx(unsigned BackrefNum)350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352 std::string Backref = std::string("\\") +
353 std::string(1, '0' + BackrefNum);
354 RegExStr += Backref;
355 }
356
EvaluateExpression(StringRef Expr,std::string & Value) const357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358 // The only supported expression is @LINE([\+-]\d+)?
359 if (!Expr.startswith("@LINE"))
360 return false;
361 Expr = Expr.substr(StringRef("@LINE").size());
362 int Offset = 0;
363 if (!Expr.empty()) {
364 if (Expr[0] == '+')
365 Expr = Expr.substr(1);
366 else if (Expr[0] != '-')
367 return false;
368 if (Expr.getAsInteger(10, Offset))
369 return false;
370 }
371 Value = llvm::itostr(LineNumber + Offset);
372 return true;
373 }
374
375 /// Match - Match the pattern string against the input buffer Buffer. This
376 /// returns the position that is matched or npos if there is no match. If
377 /// there is a match, the size of the matched string is returned in MatchLen.
Match(StringRef Buffer,size_t & MatchLen,StringMap<StringRef> & VariableTable) const378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379 StringMap<StringRef> &VariableTable) const {
380 // If this is the EOF pattern, match it immediately.
381 if (CheckTy == Check::CheckEOF) {
382 MatchLen = 0;
383 return Buffer.size();
384 }
385
386 // If this is a fixed string pattern, just match it now.
387 if (!FixedStr.empty()) {
388 MatchLen = FixedStr.size();
389 return Buffer.find(FixedStr);
390 }
391
392 // Regex match.
393
394 // If there are variable uses, we need to create a temporary string with the
395 // actual value.
396 StringRef RegExToMatch = RegExStr;
397 std::string TmpStr;
398 if (!VariableUses.empty()) {
399 TmpStr = RegExStr;
400
401 unsigned InsertOffset = 0;
402 for (const auto &VariableUse : VariableUses) {
403 std::string Value;
404
405 if (VariableUse.first[0] == '@') {
406 if (!EvaluateExpression(VariableUse.first, Value))
407 return StringRef::npos;
408 } else {
409 StringMap<StringRef>::iterator it =
410 VariableTable.find(VariableUse.first);
411 // If the variable is undefined, return an error.
412 if (it == VariableTable.end())
413 return StringRef::npos;
414
415 // Look up the value and escape it so that we can put it into the regex.
416 Value += Regex::escape(it->second);
417 }
418
419 // Plop it into the regex at the adjusted offset.
420 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
421 Value.begin(), Value.end());
422 InsertOffset += Value.size();
423 }
424
425 // Match the newly constructed regex.
426 RegExToMatch = TmpStr;
427 }
428
429
430 SmallVector<StringRef, 4> MatchInfo;
431 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
432 return StringRef::npos;
433
434 // Successful regex match.
435 assert(!MatchInfo.empty() && "Didn't get any match");
436 StringRef FullMatch = MatchInfo[0];
437
438 // If this defines any variables, remember their values.
439 for (const auto &VariableDef : VariableDefs) {
440 assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
441 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
442 }
443
444 MatchLen = FullMatch.size();
445 return FullMatch.data()-Buffer.data();
446 }
447
ComputeMatchDistance(StringRef Buffer,const StringMap<StringRef> & VariableTable) const448 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
449 const StringMap<StringRef> &VariableTable) const {
450 // Just compute the number of matching characters. For regular expressions, we
451 // just compare against the regex itself and hope for the best.
452 //
453 // FIXME: One easy improvement here is have the regex lib generate a single
454 // example regular expression which matches, and use that as the example
455 // string.
456 StringRef ExampleString(FixedStr);
457 if (ExampleString.empty())
458 ExampleString = RegExStr;
459
460 // Only compare up to the first line in the buffer, or the string size.
461 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
462 BufferPrefix = BufferPrefix.split('\n').first;
463 return BufferPrefix.edit_distance(ExampleString);
464 }
465
PrintFailureInfo(const SourceMgr & SM,StringRef Buffer,const StringMap<StringRef> & VariableTable) const466 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
467 const StringMap<StringRef> &VariableTable) const{
468 // If this was a regular expression using variables, print the current
469 // variable values.
470 if (!VariableUses.empty()) {
471 for (const auto &VariableUse : VariableUses) {
472 SmallString<256> Msg;
473 raw_svector_ostream OS(Msg);
474 StringRef Var = VariableUse.first;
475 if (Var[0] == '@') {
476 std::string Value;
477 if (EvaluateExpression(Var, Value)) {
478 OS << "with expression \"";
479 OS.write_escaped(Var) << "\" equal to \"";
480 OS.write_escaped(Value) << "\"";
481 } else {
482 OS << "uses incorrect expression \"";
483 OS.write_escaped(Var) << "\"";
484 }
485 } else {
486 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
487
488 // Check for undefined variable references.
489 if (it == VariableTable.end()) {
490 OS << "uses undefined variable \"";
491 OS.write_escaped(Var) << "\"";
492 } else {
493 OS << "with variable \"";
494 OS.write_escaped(Var) << "\" equal to \"";
495 OS.write_escaped(it->second) << "\"";
496 }
497 }
498
499 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
500 OS.str());
501 }
502 }
503
504 // Attempt to find the closest/best fuzzy match. Usually an error happens
505 // because some string in the output didn't exactly match. In these cases, we
506 // would like to show the user a best guess at what "should have" matched, to
507 // save them having to actually check the input manually.
508 size_t NumLinesForward = 0;
509 size_t Best = StringRef::npos;
510 double BestQuality = 0;
511
512 // Use an arbitrary 4k limit on how far we will search.
513 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
514 if (Buffer[i] == '\n')
515 ++NumLinesForward;
516
517 // Patterns have leading whitespace stripped, so skip whitespace when
518 // looking for something which looks like a pattern.
519 if (Buffer[i] == ' ' || Buffer[i] == '\t')
520 continue;
521
522 // Compute the "quality" of this match as an arbitrary combination of the
523 // match distance and the number of lines skipped to get to this match.
524 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
525 double Quality = Distance + (NumLinesForward / 100.);
526
527 if (Quality < BestQuality || Best == StringRef::npos) {
528 Best = i;
529 BestQuality = Quality;
530 }
531 }
532
533 // Print the "possible intended match here" line if we found something
534 // reasonable and not equal to what we showed in the "scanning from here"
535 // line.
536 if (Best && Best != StringRef::npos && BestQuality < 50) {
537 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
538 SourceMgr::DK_Note, "possible intended match here");
539
540 // FIXME: If we wanted to be really friendly we would show why the match
541 // failed, as it can be hard to spot simple one character differences.
542 }
543 }
544
FindRegexVarEnd(StringRef Str,SourceMgr & SM)545 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
546 // Offset keeps track of the current offset within the input Str
547 size_t Offset = 0;
548 // [...] Nesting depth
549 size_t BracketDepth = 0;
550
551 while (!Str.empty()) {
552 if (Str.startswith("]]") && BracketDepth == 0)
553 return Offset;
554 if (Str[0] == '\\') {
555 // Backslash escapes the next char within regexes, so skip them both.
556 Str = Str.substr(2);
557 Offset += 2;
558 } else {
559 switch (Str[0]) {
560 default:
561 break;
562 case '[':
563 BracketDepth++;
564 break;
565 case ']':
566 if (BracketDepth == 0) {
567 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
568 SourceMgr::DK_Error,
569 "missing closing \"]\" for regex variable");
570 exit(1);
571 }
572 BracketDepth--;
573 break;
574 }
575 Str = Str.substr(1);
576 Offset++;
577 }
578 }
579
580 return StringRef::npos;
581 }
582
583
584 //===----------------------------------------------------------------------===//
585 // Check Strings.
586 //===----------------------------------------------------------------------===//
587
588 /// CheckString - This is a check that we found in the input file.
589 struct CheckString {
590 /// Pat - The pattern to match.
591 Pattern Pat;
592
593 /// Prefix - Which prefix name this check matched.
594 StringRef Prefix;
595
596 /// Loc - The location in the match file that the check string was specified.
597 SMLoc Loc;
598
599 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
600 /// as opposed to a CHECK: directive.
601 Check::CheckType CheckTy;
602
603 /// DagNotStrings - These are all of the strings that are disallowed from
604 /// occurring between this match string and the previous one (or start of
605 /// file).
606 std::vector<Pattern> DagNotStrings;
607
608
CheckStringCheckString609 CheckString(const Pattern &P,
610 StringRef S,
611 SMLoc L,
612 Check::CheckType Ty)
613 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
614
615 /// Check - Match check string and its "not strings" and/or "dag strings".
616 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
617 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
618
619 /// CheckNext - Verify there is a single line in the given buffer.
620 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
621
622 /// CheckSame - Verify there is no newline in the given buffer.
623 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
624
625 /// CheckNot - Verify there's no "not strings" in the given buffer.
626 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
627 const std::vector<const Pattern *> &NotStrings,
628 StringMap<StringRef> &VariableTable) const;
629
630 /// CheckDag - Match "dag strings" and their mixed "not strings".
631 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
632 std::vector<const Pattern *> &NotStrings,
633 StringMap<StringRef> &VariableTable) const;
634 };
635
636 /// Canonicalize whitespaces in the input file. Line endings are replaced
637 /// with UNIX-style '\n'.
638 ///
639 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
640 /// characters to a single space.
641 static std::unique_ptr<MemoryBuffer>
CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,bool PreserveHorizontal)642 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
643 bool PreserveHorizontal) {
644 SmallString<128> NewFile;
645 NewFile.reserve(MB->getBufferSize());
646
647 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
648 Ptr != End; ++Ptr) {
649 // Eliminate trailing dosish \r.
650 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
651 continue;
652 }
653
654 // If current char is not a horizontal whitespace or if horizontal
655 // whitespace canonicalization is disabled, dump it to output as is.
656 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
657 NewFile.push_back(*Ptr);
658 continue;
659 }
660
661 // Otherwise, add one space and advance over neighboring space.
662 NewFile.push_back(' ');
663 while (Ptr+1 != End &&
664 (Ptr[1] == ' ' || Ptr[1] == '\t'))
665 ++Ptr;
666 }
667
668 return std::unique_ptr<MemoryBuffer>(
669 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
670 }
671
IsPartOfWord(char c)672 static bool IsPartOfWord(char c) {
673 return (isalnum(c) || c == '-' || c == '_');
674 }
675
676 // Get the size of the prefix extension.
CheckTypeSize(Check::CheckType Ty)677 static size_t CheckTypeSize(Check::CheckType Ty) {
678 switch (Ty) {
679 case Check::CheckNone:
680 return 0;
681
682 case Check::CheckPlain:
683 return sizeof(":") - 1;
684
685 case Check::CheckNext:
686 return sizeof("-NEXT:") - 1;
687
688 case Check::CheckSame:
689 return sizeof("-SAME:") - 1;
690
691 case Check::CheckNot:
692 return sizeof("-NOT:") - 1;
693
694 case Check::CheckDAG:
695 return sizeof("-DAG:") - 1;
696
697 case Check::CheckLabel:
698 return sizeof("-LABEL:") - 1;
699
700 case Check::CheckEOF:
701 llvm_unreachable("Should not be using EOF size");
702 }
703
704 llvm_unreachable("Bad check type");
705 }
706
FindCheckType(StringRef Buffer,StringRef Prefix)707 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
708 char NextChar = Buffer[Prefix.size()];
709
710 // Verify that the : is present after the prefix.
711 if (NextChar == ':')
712 return Check::CheckPlain;
713
714 if (NextChar != '-')
715 return Check::CheckNone;
716
717 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
718 if (Rest.startswith("NEXT:"))
719 return Check::CheckNext;
720
721 if (Rest.startswith("SAME:"))
722 return Check::CheckSame;
723
724 if (Rest.startswith("NOT:"))
725 return Check::CheckNot;
726
727 if (Rest.startswith("DAG:"))
728 return Check::CheckDAG;
729
730 if (Rest.startswith("LABEL:"))
731 return Check::CheckLabel;
732
733 return Check::CheckNone;
734 }
735
736 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)737 static size_t SkipWord(StringRef Str, size_t Loc) {
738 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
739 ++Loc;
740 return Loc;
741 }
742
743 // Try to find the first match in buffer for any prefix. If a valid match is
744 // found, return that prefix and set its type and location. If there are almost
745 // matches (e.g. the actual prefix string is found, but is not an actual check
746 // string), but no valid match, return an empty string and set the position to
747 // resume searching from. If no partial matches are found, return an empty
748 // string and the location will be StringRef::npos. If one prefix is a substring
749 // of another, the maximal match should be found. e.g. if "A" and "AA" are
750 // prefixes then AA-CHECK: should match the second one.
FindFirstCandidateMatch(StringRef & Buffer,Check::CheckType & CheckTy,size_t & CheckLoc)751 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
752 Check::CheckType &CheckTy,
753 size_t &CheckLoc) {
754 StringRef FirstPrefix;
755 size_t FirstLoc = StringRef::npos;
756 size_t SearchLoc = StringRef::npos;
757 Check::CheckType FirstTy = Check::CheckNone;
758
759 CheckTy = Check::CheckNone;
760 CheckLoc = StringRef::npos;
761
762 for (StringRef Prefix : CheckPrefixes) {
763 size_t PrefixLoc = Buffer.find(Prefix);
764
765 if (PrefixLoc == StringRef::npos)
766 continue;
767
768 // Track where we are searching for invalid prefixes that look almost right.
769 // We need to only advance to the first partial match on the next attempt
770 // since a partial match could be a substring of a later, valid prefix.
771 // Need to skip to the end of the word, otherwise we could end up
772 // matching a prefix in a substring later.
773 if (PrefixLoc < SearchLoc)
774 SearchLoc = SkipWord(Buffer, PrefixLoc);
775
776 // We only want to find the first match to avoid skipping some.
777 if (PrefixLoc > FirstLoc)
778 continue;
779 // If one matching check-prefix is a prefix of another, choose the
780 // longer one.
781 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
782 continue;
783
784 StringRef Rest = Buffer.drop_front(PrefixLoc);
785 // Make sure we have actually found the prefix, and not a word containing
786 // it. This should also prevent matching the wrong prefix when one is a
787 // substring of another.
788 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
789 FirstTy = Check::CheckNone;
790 else
791 FirstTy = FindCheckType(Rest, Prefix);
792
793 FirstLoc = PrefixLoc;
794 FirstPrefix = Prefix;
795 }
796
797 // If the first prefix is invalid, we should continue the search after it.
798 if (FirstTy == Check::CheckNone) {
799 CheckLoc = SearchLoc;
800 return "";
801 }
802
803 CheckTy = FirstTy;
804 CheckLoc = FirstLoc;
805 return FirstPrefix;
806 }
807
FindFirstMatchingPrefix(StringRef & Buffer,unsigned & LineNumber,Check::CheckType & CheckTy,size_t & CheckLoc)808 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
809 unsigned &LineNumber,
810 Check::CheckType &CheckTy,
811 size_t &CheckLoc) {
812 while (!Buffer.empty()) {
813 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
814 // If we found a real match, we are done.
815 if (!Prefix.empty()) {
816 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
817 return Prefix;
818 }
819
820 // We didn't find any almost matches either, we are also done.
821 if (CheckLoc == StringRef::npos)
822 return StringRef();
823
824 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
825
826 // Advance to the last possible match we found and try again.
827 Buffer = Buffer.drop_front(CheckLoc + 1);
828 }
829
830 return StringRef();
831 }
832
833 /// ReadCheckFile - Read the check file, which specifies the sequence of
834 /// expected strings. The strings are added to the CheckStrings vector.
835 /// Returns true in case of an error, false otherwise.
ReadCheckFile(SourceMgr & SM,std::vector<CheckString> & CheckStrings)836 static bool ReadCheckFile(SourceMgr &SM,
837 std::vector<CheckString> &CheckStrings) {
838 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
839 MemoryBuffer::getFileOrSTDIN(CheckFilename);
840 if (std::error_code EC = FileOrErr.getError()) {
841 errs() << "Could not open check file '" << CheckFilename
842 << "': " << EC.message() << '\n';
843 return true;
844 }
845
846 // If we want to canonicalize whitespace, strip excess whitespace from the
847 // buffer containing the CHECK lines. Remove DOS style line endings.
848 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
849 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
850
851 // Find all instances of CheckPrefix followed by : in the file.
852 StringRef Buffer = F->getBuffer();
853
854 SM.AddNewSourceBuffer(std::move(F), SMLoc());
855
856 std::vector<Pattern> ImplicitNegativeChecks;
857 for (const auto &PatternString : ImplicitCheckNot) {
858 // Create a buffer with fake command line content in order to display the
859 // command line option responsible for the specific implicit CHECK-NOT.
860 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
861 std::string Suffix = "'";
862 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
863 Prefix + PatternString + Suffix, "command line");
864
865 StringRef PatternInBuffer =
866 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
867 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
868
869 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
870 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
871 "IMPLICIT-CHECK", SM, 0);
872 }
873
874
875 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
876
877 // LineNumber keeps track of the line on which CheckPrefix instances are
878 // found.
879 unsigned LineNumber = 1;
880
881 while (1) {
882 Check::CheckType CheckTy;
883 size_t PrefixLoc;
884
885 // See if a prefix occurs in the memory buffer.
886 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
887 LineNumber,
888 CheckTy,
889 PrefixLoc);
890 if (UsedPrefix.empty())
891 break;
892
893 Buffer = Buffer.drop_front(PrefixLoc);
894
895 // Location to use for error messages.
896 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
897
898 // PrefixLoc is to the start of the prefix. Skip to the end.
899 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
900
901 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
902 // leading and trailing whitespace.
903 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
904
905 // Scan ahead to the end of line.
906 size_t EOL = Buffer.find_first_of("\n\r");
907
908 // Remember the location of the start of the pattern, for diagnostics.
909 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
910
911 // Parse the pattern.
912 Pattern P(CheckTy);
913 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
914 return true;
915
916 // Verify that CHECK-LABEL lines do not define or use variables
917 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
918 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
919 SourceMgr::DK_Error,
920 "found '" + UsedPrefix + "-LABEL:'"
921 " with variable definition or use");
922 return true;
923 }
924
925 Buffer = Buffer.substr(EOL);
926
927 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
928 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
929 CheckStrings.empty()) {
930 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
931 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
932 SourceMgr::DK_Error,
933 "found '" + UsedPrefix + "-" + Type + "' without previous '"
934 + UsedPrefix + ": line");
935 return true;
936 }
937
938 // Handle CHECK-DAG/-NOT.
939 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
940 DagNotMatches.push_back(P);
941 continue;
942 }
943
944 // Okay, add the string we captured to the output vector and move on.
945 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy);
946 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
947 DagNotMatches = ImplicitNegativeChecks;
948 }
949
950 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
951 // prefix as a filler for the error message.
952 if (!DagNotMatches.empty()) {
953 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
954 SMLoc::getFromPointer(Buffer.data()),
955 Check::CheckEOF);
956 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
957 }
958
959 if (CheckStrings.empty()) {
960 errs() << "error: no check strings found with prefix"
961 << (CheckPrefixes.size() > 1 ? "es " : " ");
962 prefix_iterator I = CheckPrefixes.begin();
963 prefix_iterator E = CheckPrefixes.end();
964 if (I != E) {
965 errs() << "\'" << *I << ":'";
966 ++I;
967 }
968 for (; I != E; ++I)
969 errs() << ", \'" << *I << ":'";
970
971 errs() << '\n';
972 return true;
973 }
974
975 return false;
976 }
977
PrintCheckFailed(const SourceMgr & SM,SMLoc Loc,const Pattern & Pat,StringRef Buffer,StringMap<StringRef> & VariableTable)978 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
979 const Pattern &Pat, StringRef Buffer,
980 StringMap<StringRef> &VariableTable) {
981 // Otherwise, we have an error, emit an error message.
982 SM.PrintMessage(Loc, SourceMgr::DK_Error,
983 "expected string not found in input");
984
985 // Print the "scanning from here" line. If the current position is at the
986 // end of a line, advance to the start of the next line.
987 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
988
989 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
990 "scanning from here");
991
992 // Allow the pattern to print additional information if desired.
993 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
994 }
995
PrintCheckFailed(const SourceMgr & SM,const CheckString & CheckStr,StringRef Buffer,StringMap<StringRef> & VariableTable)996 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
997 StringRef Buffer,
998 StringMap<StringRef> &VariableTable) {
999 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1000 }
1001
1002 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1003 /// range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1004 static unsigned CountNumNewlinesBetween(StringRef Range,
1005 const char *&FirstNewLine) {
1006 unsigned NumNewLines = 0;
1007 while (1) {
1008 // Scan for newline.
1009 Range = Range.substr(Range.find_first_of("\n\r"));
1010 if (Range.empty()) return NumNewLines;
1011
1012 ++NumNewLines;
1013
1014 // Handle \n\r and \r\n as a single newline.
1015 if (Range.size() > 1 &&
1016 (Range[1] == '\n' || Range[1] == '\r') &&
1017 (Range[0] != Range[1]))
1018 Range = Range.substr(1);
1019 Range = Range.substr(1);
1020
1021 if (NumNewLines == 1)
1022 FirstNewLine = Range.begin();
1023 }
1024 }
1025
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,StringMap<StringRef> & VariableTable) const1026 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1027 bool IsLabelScanMode, size_t &MatchLen,
1028 StringMap<StringRef> &VariableTable) const {
1029 size_t LastPos = 0;
1030 std::vector<const Pattern *> NotStrings;
1031
1032 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1033 // bounds; we have not processed variable definitions within the bounded block
1034 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1035 // over the block again (including the last CHECK-LABEL) in normal mode.
1036 if (!IsLabelScanMode) {
1037 // Match "dag strings" (with mixed "not strings" if any).
1038 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1039 if (LastPos == StringRef::npos)
1040 return StringRef::npos;
1041 }
1042
1043 // Match itself from the last position after matching CHECK-DAG.
1044 StringRef MatchBuffer = Buffer.substr(LastPos);
1045 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1046 if (MatchPos == StringRef::npos) {
1047 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1048 return StringRef::npos;
1049 }
1050
1051 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1052 // or CHECK-NOT
1053 if (!IsLabelScanMode) {
1054 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1055
1056 // If this check is a "CHECK-NEXT", verify that the previous match was on
1057 // the previous line (i.e. that there is one newline between them).
1058 if (CheckNext(SM, SkippedRegion))
1059 return StringRef::npos;
1060
1061 // If this check is a "CHECK-SAME", verify that the previous match was on
1062 // the same line (i.e. that there is no newline between them).
1063 if (CheckSame(SM, SkippedRegion))
1064 return StringRef::npos;
1065
1066 // If this match had "not strings", verify that they don't exist in the
1067 // skipped region.
1068 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1069 return StringRef::npos;
1070 }
1071
1072 return LastPos + MatchPos;
1073 }
1074
CheckNext(const SourceMgr & SM,StringRef Buffer) const1075 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1076 if (CheckTy != Check::CheckNext)
1077 return false;
1078
1079 // Count the number of newlines between the previous match and this one.
1080 assert(Buffer.data() !=
1081 SM.getMemoryBuffer(
1082 SM.FindBufferContainingLoc(
1083 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1084 "CHECK-NEXT can't be the first check in a file");
1085
1086 const char *FirstNewLine = nullptr;
1087 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1088
1089 if (NumNewLines == 0) {
1090 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1091 "-NEXT: is on the same line as previous match");
1092 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1093 SourceMgr::DK_Note, "'next' match was here");
1094 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1095 "previous match ended here");
1096 return true;
1097 }
1098
1099 if (NumNewLines != 1) {
1100 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1101 "-NEXT: is not on the line after the previous match");
1102 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1103 SourceMgr::DK_Note, "'next' match was here");
1104 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1105 "previous match ended here");
1106 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1107 "non-matching line after previous match is here");
1108 return true;
1109 }
1110
1111 return false;
1112 }
1113
CheckSame(const SourceMgr & SM,StringRef Buffer) const1114 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1115 if (CheckTy != Check::CheckSame)
1116 return false;
1117
1118 // Count the number of newlines between the previous match and this one.
1119 assert(Buffer.data() !=
1120 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1121 SMLoc::getFromPointer(Buffer.data())))
1122 ->getBufferStart() &&
1123 "CHECK-SAME can't be the first check in a file");
1124
1125 const char *FirstNewLine = nullptr;
1126 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1127
1128 if (NumNewLines != 0) {
1129 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1130 Prefix +
1131 "-SAME: is not on the same line as the previous match");
1132 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1133 "'next' match was here");
1134 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1135 "previous match ended here");
1136 return true;
1137 }
1138
1139 return false;
1140 }
1141
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1142 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1143 const std::vector<const Pattern *> &NotStrings,
1144 StringMap<StringRef> &VariableTable) const {
1145 for (const Pattern *Pat : NotStrings) {
1146 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1147
1148 size_t MatchLen = 0;
1149 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1150
1151 if (Pos == StringRef::npos) continue;
1152
1153 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1154 SourceMgr::DK_Error,
1155 Prefix + "-NOT: string occurred!");
1156 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1157 Prefix + "-NOT: pattern specified here");
1158 return true;
1159 }
1160
1161 return false;
1162 }
1163
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1164 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1165 std::vector<const Pattern *> &NotStrings,
1166 StringMap<StringRef> &VariableTable) const {
1167 if (DagNotStrings.empty())
1168 return 0;
1169
1170 size_t LastPos = 0;
1171 size_t StartPos = LastPos;
1172
1173 for (const Pattern &Pat : DagNotStrings) {
1174 assert((Pat.getCheckTy() == Check::CheckDAG ||
1175 Pat.getCheckTy() == Check::CheckNot) &&
1176 "Invalid CHECK-DAG or CHECK-NOT!");
1177
1178 if (Pat.getCheckTy() == Check::CheckNot) {
1179 NotStrings.push_back(&Pat);
1180 continue;
1181 }
1182
1183 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1184
1185 size_t MatchLen = 0, MatchPos;
1186
1187 // CHECK-DAG always matches from the start.
1188 StringRef MatchBuffer = Buffer.substr(StartPos);
1189 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1190 // With a group of CHECK-DAGs, a single mismatching means the match on
1191 // that group of CHECK-DAGs fails immediately.
1192 if (MatchPos == StringRef::npos) {
1193 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1194 return StringRef::npos;
1195 }
1196 // Re-calc it as the offset relative to the start of the original string.
1197 MatchPos += StartPos;
1198
1199 if (!NotStrings.empty()) {
1200 if (MatchPos < LastPos) {
1201 // Reordered?
1202 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1203 SourceMgr::DK_Error,
1204 Prefix + "-DAG: found a match of CHECK-DAG"
1205 " reordering across a CHECK-NOT");
1206 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1207 SourceMgr::DK_Note,
1208 Prefix + "-DAG: the farthest match of CHECK-DAG"
1209 " is found here");
1210 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1211 Prefix + "-NOT: the crossed pattern specified"
1212 " here");
1213 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1214 Prefix + "-DAG: the reordered pattern specified"
1215 " here");
1216 return StringRef::npos;
1217 }
1218 // All subsequent CHECK-DAGs should be matched from the farthest
1219 // position of all precedent CHECK-DAGs (including this one.)
1220 StartPos = LastPos;
1221 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1222 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1223 // region.
1224 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1225 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1226 return StringRef::npos;
1227 // Clear "not strings".
1228 NotStrings.clear();
1229 }
1230
1231 // Update the last position with CHECK-DAG matches.
1232 LastPos = std::max(MatchPos + MatchLen, LastPos);
1233 }
1234
1235 return LastPos;
1236 }
1237
1238 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1239 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1240 Regex Validator("^[a-zA-Z0-9_-]*$");
1241 return Validator.match(CheckPrefix);
1242 }
1243
ValidateCheckPrefixes()1244 static bool ValidateCheckPrefixes() {
1245 StringSet<> PrefixSet;
1246
1247 for (StringRef Prefix : CheckPrefixes) {
1248 // Reject empty prefixes.
1249 if (Prefix == "")
1250 return false;
1251
1252 if (!PrefixSet.insert(Prefix).second)
1253 return false;
1254
1255 if (!ValidateCheckPrefix(Prefix))
1256 return false;
1257 }
1258
1259 return true;
1260 }
1261
1262 // I don't think there's a way to specify an initial value for cl::list,
1263 // so if nothing was specified, add the default
AddCheckPrefixIfNeeded()1264 static void AddCheckPrefixIfNeeded() {
1265 if (CheckPrefixes.empty())
1266 CheckPrefixes.push_back("CHECK");
1267 }
1268
main(int argc,char ** argv)1269 int main(int argc, char **argv) {
1270 sys::PrintStackTraceOnErrorSignal();
1271 PrettyStackTraceProgram X(argc, argv);
1272 cl::ParseCommandLineOptions(argc, argv);
1273
1274 if (!ValidateCheckPrefixes()) {
1275 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1276 "start with a letter and contain only alphanumeric characters, "
1277 "hyphens and underscores\n";
1278 return 2;
1279 }
1280
1281 AddCheckPrefixIfNeeded();
1282
1283 SourceMgr SM;
1284
1285 // Read the expected strings from the check file.
1286 std::vector<CheckString> CheckStrings;
1287 if (ReadCheckFile(SM, CheckStrings))
1288 return 2;
1289
1290 // Open the file to check and add it to SourceMgr.
1291 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1292 MemoryBuffer::getFileOrSTDIN(InputFilename);
1293 if (std::error_code EC = FileOrErr.getError()) {
1294 errs() << "Could not open input file '" << InputFilename
1295 << "': " << EC.message() << '\n';
1296 return 2;
1297 }
1298 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1299
1300 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1301 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1302 return 2;
1303 }
1304
1305 // Remove duplicate spaces in the input file if requested.
1306 // Remove DOS style line endings.
1307 std::unique_ptr<MemoryBuffer> F =
1308 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1309
1310 // Check that we have all of the expected strings, in order, in the input
1311 // file.
1312 StringRef Buffer = F->getBuffer();
1313
1314 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1315
1316 /// VariableTable - This holds all the current filecheck variables.
1317 StringMap<StringRef> VariableTable;
1318
1319 bool hasError = false;
1320
1321 unsigned i = 0, j = 0, e = CheckStrings.size();
1322
1323 while (true) {
1324 StringRef CheckRegion;
1325 if (j == e) {
1326 CheckRegion = Buffer;
1327 } else {
1328 const CheckString &CheckLabelStr = CheckStrings[j];
1329 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1330 ++j;
1331 continue;
1332 }
1333
1334 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1335 size_t MatchLabelLen = 0;
1336 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1337 MatchLabelLen, VariableTable);
1338 if (MatchLabelPos == StringRef::npos) {
1339 hasError = true;
1340 break;
1341 }
1342
1343 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1344 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1345 ++j;
1346 }
1347
1348 for ( ; i != j; ++i) {
1349 const CheckString &CheckStr = CheckStrings[i];
1350
1351 // Check each string within the scanned region, including a second check
1352 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1353 size_t MatchLen = 0;
1354 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1355 VariableTable);
1356
1357 if (MatchPos == StringRef::npos) {
1358 hasError = true;
1359 i = j;
1360 break;
1361 }
1362
1363 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1364 }
1365
1366 if (j == e)
1367 break;
1368 }
1369
1370 return hasError ? 1 : 0;
1371 }
1372