1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "bmhParser.h"
9
10 #include "SkCommandLineFlags.h"
11 #include "SkOSFile.h"
12 #include "SkOSPath.h"
13
14 /*
15 things to do
16 if cap word is beginning of sentence, add it to table as lower-case
17 word must have only a single initial capital
18
19 if word is camel cased, look for :: matches on suffix
20
21 when function crosses lines, whole thing isn't seen as a 'word' e.g., search for largeArc in path
22
23 words in external not seen
24
25 look for x-bit but allow x bits
26
27 don't treat 'pos' or 'glyphs' as spell-checkable as in 'RunBuffer.pos' or 'RunBuffer.glyphs'
28 */
29
30 struct CheckEntry {
31 string fFile;
32 int fLine;
33 int fCount;
34 bool fOverride;
35 };
36
37 class SpellCheck : public ParserCommon {
38 public:
SpellCheck(const BmhParser & bmh)39 SpellCheck(const BmhParser& bmh) : ParserCommon()
40 , fBmhParser(bmh) {
41 this->reset();
42 }
43 bool check(const char* match);
44 void report(SkCommandLineFlags::StringArray report);
45 private:
46 enum class TableState {
47 kNone,
48 kRow,
49 kColumn,
50 };
51
52 enum class PrintCheck {
53 kWordsOnly,
54 kAllowNumbers,
55 };
56
57 bool check(Definition* );
58 bool checkable(MarkType markType);
59 void childCheck(Definition* def, const char* start);
60 void leafCheck(const char* start, const char* end);
parseFromFile(const char * path)61 bool parseFromFile(const char* path) override { return true; }
62 void printCheck(string str, PrintCheck);
63
reset()64 void reset() override {
65 INHERITED::resetCommon();
66 fMethod = nullptr;
67 fRoot = nullptr;
68 fInCode = false;
69 fInConst = false;
70 fInFormula = false;
71 fInDescription = false;
72 fInStdOut = false;
73 fOverride = false;
74 }
75
76 void wordCheck(string str);
77 void wordCheck(ptrdiff_t len, const char* ch);
78
79 unordered_map<string, CheckEntry> fCode;
80 unordered_map<string, CheckEntry> fColons;
81 unordered_map<string, CheckEntry> fDigits;
82 unordered_map<string, CheckEntry> fDots;
83 unordered_map<string, CheckEntry> fParens; // also hold destructors, operators
84 unordered_map<string, CheckEntry> fUnderscores;
85 unordered_map<string, CheckEntry> fWords;
86 const BmhParser& fBmhParser;
87 Definition* fMethod;
88 RootDefinition* fRoot;
89 int fLocalLine;
90 bool fInCode;
91 bool fInConst;
92 bool fInDescription;
93 bool fInFormula;
94 bool fInStdOut;
95 bool fOverride;
96 typedef ParserCommon INHERITED;
97 };
98
99 /* This doesn't perform a traditional spell or grammar check, although
100 maybe it should. Instead it looks for words used uncommonly and lower
101 case words that match capitalized words that are not sentence starters.
102 It also looks for articles preceeding capitalized words and their
103 modifiers to try to maintain a consistent voice.
104 Maybe also look for passive verbs (e.g. 'is') and suggest active ones?
105 */
spellCheck(const char * match,SkCommandLineFlags::StringArray report) const106 void BmhParser::spellCheck(const char* match, SkCommandLineFlags::StringArray report) const {
107 SpellCheck checker(*this);
108 checker.check(match);
109 checker.report(report);
110 }
111
spellStatus(const char * statusFile,SkCommandLineFlags::StringArray report) const112 void BmhParser::spellStatus(const char* statusFile, SkCommandLineFlags::StringArray report) const {
113 SpellCheck checker(*this);
114 StatusIter iter(statusFile, ".bmh", StatusFilter::kInProgress);
115 string file;
116 iter.next(&file, nullptr);
117 string match = iter.baseDir();
118 checker.check(match.c_str());
119 checker.report(report);
120 }
121
check(const char * match)122 bool SpellCheck::check(const char* match) {
123 for (const auto& topic : fBmhParser.fTopicMap) {
124 Definition* topicDef = topic.second;
125 if (topicDef->fParent) {
126 continue;
127 }
128 if (!topicDef->isRoot()) {
129 return this->reportError<bool>("expected root topic");
130 }
131 fRoot = topicDef->asRoot();
132 if (string::npos == fRoot->fFileName.rfind(match)) {
133 continue;
134 }
135 fOverride = string::npos != fRoot->fFileName.rfind("undocumented.bmh")
136 || string::npos != fRoot->fFileName.rfind("markup.bmh")
137 || string::npos != fRoot->fFileName.rfind("usingBookmaker.bmh");
138 this->check(topicDef);
139 }
140 return true;
141 }
142
all_lower(string str)143 static bool all_lower(string str) {
144 for (auto c : str) {
145 if (!islower(c)) {
146 return false;
147 }
148 }
149 return true;
150 }
151
check(Definition * def)152 bool SpellCheck::check(Definition* def) {
153 fFileName = def->fFileName;
154 fLineCount = def->fLineCount;
155 string printable = def->printableName();
156 const char* textStart = def->fContentStart;
157 switch (def->fMarkType) {
158 case MarkType::kAlias:
159 break;
160 case MarkType::kAnchor:
161 break;
162 case MarkType::kBug:
163 break;
164 case MarkType::kClass:
165 this->wordCheck(def->fName);
166 break;
167 case MarkType::kCode:
168 fInCode = true;
169 break;
170 case MarkType::kColumn:
171 break;
172 case MarkType::kComment:
173 break;
174 case MarkType::kConst: {
175 fInConst = true;
176 this->wordCheck(def->fName);
177 const char* lineEnd = strchr(textStart, '\n');
178 this->wordCheck(lineEnd - textStart, textStart);
179 textStart = lineEnd;
180 } break;
181 case MarkType::kDefine:
182 break;
183 case MarkType::kDescription:
184 fInDescription = true;
185 break;
186 case MarkType::kDetails:
187 break;
188 case MarkType::kDuration:
189 break;
190 case MarkType::kEnum:
191 case MarkType::kEnumClass:
192 this->wordCheck(def->fName);
193 break;
194 case MarkType::kExample:
195 break;
196 case MarkType::kExternal:
197 break;
198 case MarkType::kFile:
199 break;
200 case MarkType::kFilter:
201 break;
202 case MarkType::kFormula:
203 fInFormula = true;
204 break;
205 case MarkType::kFunction:
206 break;
207 case MarkType::kHeight:
208 break;
209 case MarkType::kIllustration:
210 break;
211 case MarkType::kImage:
212 break;
213 case MarkType::kIn:
214 break;
215 case MarkType::kLegend:
216 break;
217 case MarkType::kLine:
218 break;
219 case MarkType::kLink:
220 break;
221 case MarkType::kList:
222 break;
223 case MarkType::kLiteral:
224 break;
225 case MarkType::kMarkChar:
226 break;
227 case MarkType::kMember:
228 break;
229 case MarkType::kMethod: {
230 string method_name = def->methodName();
231 if (all_lower(method_name)) {
232 method_name += "()";
233 }
234 if (!def->isClone() && Definition::MethodType::kOperator != def->fMethodType) {
235 this->wordCheck(method_name);
236 }
237 fMethod = def;
238 } break;
239 case MarkType::kNoExample:
240 break;
241 case MarkType::kNoJustify:
242 break;
243 case MarkType::kOutdent:
244 break;
245 case MarkType::kParam: {
246 TextParser paramParser(def->fFileName, def->fStart, def->fContentStart,
247 def->fLineCount);
248 paramParser.skipWhiteSpace();
249 SkASSERT(paramParser.startsWith("#Param"));
250 paramParser.next(); // skip hash
251 paramParser.skipToNonName(); // skip Param
252 paramParser.skipSpace();
253 const char* paramName = paramParser.fChar;
254 paramParser.skipToSpace();
255 fInCode = true;
256 this->wordCheck(paramParser.fChar - paramName, paramName);
257 fInCode = false;
258 } break;
259 case MarkType::kPhraseDef:
260 break;
261 case MarkType::kPhraseParam:
262 break;
263 case MarkType::kPhraseRef:
264 break;
265 case MarkType::kPlatform:
266 break;
267 case MarkType::kPopulate:
268 break;
269 case MarkType::kReturn:
270 break;
271 case MarkType::kRow:
272 break;
273 case MarkType::kSeeAlso:
274 break;
275 case MarkType::kSet:
276 break;
277 case MarkType::kStdOut: {
278 fInStdOut = true;
279 TextParser code(def);
280 code.skipSpace();
281 while (!code.eof()) {
282 const char* end = code.trimmedLineEnd();
283 this->wordCheck(end - code.fChar, code.fChar);
284 code.skipToLineStart();
285 }
286 fInStdOut = false;
287 } break;
288 case MarkType::kStruct:
289 fRoot = def->asRoot();
290 this->wordCheck(def->fName);
291 break;
292 case MarkType::kSubstitute:
293 break;
294 case MarkType::kSubtopic:
295 // TODO: add a tag that allows subtopic labels in illustrations to skip spellcheck?
296 if (string::npos == fFileName.find("illustrations.bmh")) {
297 this->printCheck(printable, PrintCheck::kAllowNumbers);
298 }
299 break;
300 case MarkType::kTable:
301 break;
302 case MarkType::kTemplate:
303 break;
304 case MarkType::kText:
305 break;
306 case MarkType::kToDo:
307 break;
308 case MarkType::kTopic:
309 this->printCheck(printable, PrintCheck::kWordsOnly);
310 break;
311 case MarkType::kTypedef:
312 break;
313 case MarkType::kUnion:
314 break;
315 case MarkType::kVolatile:
316 break;
317 case MarkType::kWidth:
318 break;
319 default:
320 SkASSERT(0); // handle everything
321 break;
322 }
323 this->childCheck(def, textStart);
324 switch (def->fMarkType) { // post child work, at least for tables
325 case MarkType::kCode:
326 fInCode = false;
327 break;
328 case MarkType::kColumn:
329 break;
330 case MarkType::kDescription:
331 fInDescription = false;
332 break;
333 case MarkType::kEnum:
334 case MarkType::kEnumClass:
335 break;
336 case MarkType::kExample:
337 break;
338 case MarkType::kFormula:
339 fInFormula = false;
340 break;
341 case MarkType::kLegend:
342 break;
343 case MarkType::kMethod:
344 fMethod = nullptr;
345 break;
346 case MarkType::kConst:
347 fInConst = false;
348 case MarkType::kParam:
349 break;
350 case MarkType::kReturn:
351 case MarkType::kSeeAlso:
352 break;
353 case MarkType::kRow:
354 break;
355 case MarkType::kStruct:
356 fRoot = fRoot->rootParent();
357 break;
358 case MarkType::kTable:
359 break;
360 default:
361 break;
362 }
363 return true;
364 }
365
checkable(MarkType markType)366 bool SpellCheck::checkable(MarkType markType) {
367 return Resolvable::kYes == fBmhParser.kMarkProps[(int) markType].fResolve;
368 }
369
childCheck(Definition * def,const char * start)370 void SpellCheck::childCheck(Definition* def, const char* start) {
371 const char* end;
372 fLineCount = def->fLineCount;
373 if (def->isRoot()) {
374 fRoot = def->asRoot();
375 }
376 for (auto& child : def->fChildren) {
377 end = child->fStart;
378 if (this->checkable(def->fMarkType)) {
379 this->leafCheck(start, end);
380 }
381 this->check(child);
382 start = child->fTerminator;
383 }
384 if (this->checkable(def->fMarkType)) {
385 end = def->fContentEnd;
386 this->leafCheck(start, end);
387 }
388 }
389
leafCheck(const char * start,const char * end)390 void SpellCheck::leafCheck(const char* start, const char* end) {
391 const char* chPtr = start;
392 int inAngles = 0;
393 int inParens = 0;
394 bool inQuotes = false;
395 bool allLower = true;
396 char prePriorCh = 0;
397 char priorCh = 0;
398 char lastCh = 0;
399 const char* wordStart = nullptr;
400 const char* wordEnd = nullptr;
401 const char* possibleEnd = nullptr;
402 fLocalLine = 0;
403 do {
404 if (wordStart && wordEnd) {
405 if (!allLower || (!inQuotes && '\"' != lastCh && !inParens
406 && ')' != lastCh && !inAngles && '>' != lastCh)) {
407 string word(wordStart, (possibleEnd ? possibleEnd : wordEnd) - wordStart);
408 if ("e" != word || !isdigit(prePriorCh) || ('+' != lastCh &&
409 '-' != lastCh && !isdigit(lastCh))) {
410 this->wordCheck(word);
411 }
412 }
413 wordStart = nullptr;
414 }
415 if (chPtr == end) {
416 break;
417 }
418 switch (*chPtr) {
419 case '>':
420 if (isalpha(lastCh)) {
421 --inAngles;
422 SkASSERT(inAngles >= 0);
423 }
424 wordEnd = chPtr;
425 break;
426 case '(':
427 ++inParens;
428 possibleEnd = chPtr;
429 break;
430 case ')':
431 --inParens;
432 if ('(' == lastCh) {
433 wordEnd = chPtr + 1;
434 } else {
435 wordEnd = chPtr;
436 }
437 SkASSERT(inParens >= 0 || fInStdOut);
438 break;
439 case '\"':
440 inQuotes = !inQuotes;
441 wordEnd = chPtr;
442 SkASSERT(inQuotes == !wordStart);
443 break;
444 case 'A': case 'B': case 'C': case 'D': case 'E':
445 case 'F': case 'G': case 'H': case 'I': case 'J':
446 case 'K': case 'L': case 'M': case 'N': case 'O':
447 case 'P': case 'Q': case 'R': case 'S': case 'T':
448 case 'U': case 'V': case 'W': case 'X': case 'Y':
449 case 'Z':
450 allLower = false;
451 case 'a': case 'b': case 'c': case 'd': case 'e':
452 case 'f': case 'g': case 'h': case 'i': case 'j':
453 case 'k': case 'l': case 'm': case 'n': case 'o':
454 case 'p': case 'q': case 'r': case 's': case 't':
455 case 'u': case 'v': case 'w': case 'x': case 'y':
456 case 'z':
457 if (!wordStart) {
458 wordStart = chPtr;
459 wordEnd = nullptr;
460 possibleEnd = nullptr;
461 allLower = 'a' <= *chPtr;
462 if ('<' == lastCh || ('<' == priorCh && '/' == lastCh)) {
463 ++inAngles;
464 }
465 }
466 break;
467 case '0': case '1': case '2': case '3': case '4':
468 case '5': case '6': case '7': case '8': case '9':
469 case '_':
470 allLower = false;
471 case '-': // note that dash doesn't clear allLower
472 break;
473 case '!':
474 if (!inQuotes) {
475 wordEnd = chPtr;
476 }
477 break;
478 case '\n':
479 ++fLocalLine;
480 // fall through
481 default:
482 wordEnd = chPtr;
483 break;
484 }
485 prePriorCh = priorCh;
486 priorCh = lastCh;
487 lastCh = *chPtr;
488 } while (++chPtr <= end);
489 }
490
printCheck(string str,PrintCheck allowed)491 void SpellCheck::printCheck(string str, PrintCheck allowed) {
492 string word;
493 for (std::stringstream stream(str); stream >> word; ) {
494 if (PrintCheck::kAllowNumbers == allowed && (std::isdigit(word.back()) || 'x' == word.back())) {
495 // allow ###x for RGB_888x
496 if ((size_t) std::count_if(word.begin(), word.end() - 1,
497 [](unsigned char c){ return std::isdigit(c); } ) == word.length() - 1) {
498 continue;
499 }
500 }
501 wordCheck(word);
502 }
503 }
504
stringCompare(const std::pair<string,CheckEntry> & i,const std::pair<string,CheckEntry> & j)505 static bool stringCompare(const std::pair<string, CheckEntry>& i, const std::pair<string, CheckEntry>& j) {
506 return i.first.compare(j.first) < 0;
507 }
508
report(SkCommandLineFlags::StringArray report)509 void SpellCheck::report(SkCommandLineFlags::StringArray report) {
510 vector<std::pair<string, CheckEntry>> elems(fWords.begin(), fWords.end());
511 std::sort(elems.begin(), elems.end(), stringCompare);
512 if (report.contains("once")) {
513 for (auto iter : elems) {
514 if (iter.second.fOverride) {
515 continue;
516 }
517 if (iter.second.fCount == 1) {
518 string fullName = this->ReportFilename(iter.second.fFile);
519 SkDebugf("%s(%d): %s\n", fullName.c_str(), iter.second.fLine,
520 iter.first.c_str());
521 }
522 }
523 SkDebugf("\n");
524 return;
525 }
526 if (report.contains("all")) {
527 int column = 0;
528 char lastInitial = 'a';
529 int count = 0;
530 for (auto iter : elems) {
531 if (iter.second.fOverride) {
532 continue;
533 }
534 string check = iter.first.c_str();
535 bool allLower = true;
536 for (auto c : check) {
537 if (isupper(c)) {
538 allLower = false;
539 break;
540 }
541 }
542 if (!allLower) {
543 continue;
544 }
545 if (column + check.length() > 100 || check[0] != lastInitial) {
546 SkDebugf("\n");
547 column = 0;
548 }
549 if (check[0] != lastInitial) {
550 SkDebugf("\n");
551 lastInitial = check[0];
552 }
553 SkDebugf("%s ", check.c_str());
554 column += check.length();
555 ++count;
556 }
557 SkDebugf("\n\ncount = %d\n", count);
558 return;
559 }
560 int index = 0;
561 const char* mispelled = report[0];
562 for (auto iter : elems) {
563 if (iter.second.fOverride) {
564 continue;
565 }
566 string check = iter.first.c_str();
567 while (check.compare(mispelled) > 0) {
568 SkDebugf("%s not found\n", mispelled);
569 if (report.count() == ++index) {
570 break;
571 }
572 }
573 if (report.count() == index) {
574 break;
575 }
576 if (check.compare(mispelled) == 0) {
577 string fullName = this->ReportFilename(iter.second.fFile);
578 SkDebugf("%s(%d): %s\n", fullName.c_str(), iter.second.fLine,
579 iter.first.c_str());
580 if (report.count() == ++index) {
581 break;
582 }
583 }
584 }
585 }
586
wordCheck(string str)587 void SpellCheck::wordCheck(string str) {
588 if ("nullptr" == str) {
589 return; // doesn't seem worth it, treating nullptr as a word in need of correction
590 }
591 bool hasColon = false;
592 bool hasDot = false;
593 bool hasParen = false;
594 bool hasUnderscore = false;
595 bool sawDash = false;
596 bool sawDigit = false;
597 bool sawSpecial = false;
598 SkASSERT(str.length() > 0);
599 SkASSERT(isalpha(str[0]) || '~' == str[0]);
600 for (char ch : str) {
601 if (isalpha(ch) || '-' == ch) {
602 sawDash |= '-' == ch;
603 continue;
604 }
605 bool isColon = ':' == ch;
606 hasColon |= isColon;
607 bool isDot = '.' == ch;
608 hasDot |= isDot;
609 bool isParen = '(' == ch || ')' == ch || '~' == ch || '=' == ch || '!' == ch ||
610 '[' == ch || ']' == ch;
611 hasParen |= isParen;
612 bool isUnderscore = '_' == ch;
613 hasUnderscore |= isUnderscore;
614 if (isColon || isDot || isUnderscore || isParen) {
615 continue;
616 }
617 if (isdigit(ch)) {
618 sawDigit = true;
619 continue;
620 }
621 if ('&' == ch || ',' == ch || ' ' == ch) {
622 sawSpecial = true;
623 continue;
624 }
625 SkASSERT(0);
626 }
627 if (sawSpecial && !hasParen) {
628 SkASSERT(0);
629 }
630 bool inCode = fInCode;
631 if (hasUnderscore && isupper(str[0]) && ('S' != str[0] || 'K' != str[1])
632 && !hasColon && !hasDot && !hasParen && !fInStdOut && !inCode && !fInConst
633 && !sawDigit && !sawSpecial && !sawDash) {
634 std::istringstream ss(str);
635 string token;
636 while (std::getline(ss, token, '_')) {
637 if (token.length()) {
638 this->wordCheck(token);
639 }
640 }
641 return;
642 }
643 if (!hasColon && !hasDot && !hasParen && !hasUnderscore
644 && !fInStdOut && !inCode && !fInConst && !sawDigit
645 && islower(str[0]) && isupper(str[1])) {
646 inCode = true;
647 }
648 bool methodParam = false;
649 if (fMethod) {
650 for (auto child : fMethod->fChildren) {
651 if (MarkType::kParam == child->fMarkType && str == child->fName) {
652 methodParam = true;
653 break;
654 }
655 }
656 }
657 auto& mappy = hasColon ? fColons :
658 hasDot ? fDots :
659 hasParen ? fParens :
660 hasUnderscore ? fUnderscores :
661 fInStdOut || fInFormula || inCode || fInConst || methodParam ? fCode :
662 sawDigit ? fDigits : fWords;
663 auto iter = mappy.find(str);
664 if (mappy.end() != iter) {
665 if (iter->second.fOverride && !fOverride) {
666 iter->second.fFile = fFileName;
667 iter->second.fLine = fLineCount + fLocalLine;
668 iter->second.fOverride = false;
669 }
670 iter->second.fCount += 1;
671 } else {
672 CheckEntry* entry = &mappy[str];
673 entry->fFile = fFileName;
674 entry->fLine = fLineCount + fLocalLine;
675 entry->fCount = 1;
676 entry->fOverride = fOverride;
677 }
678 }
679
wordCheck(ptrdiff_t len,const char * ch)680 void SpellCheck::wordCheck(ptrdiff_t len, const char* ch) {
681 leafCheck(ch, ch + len);
682 }
683