1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
28
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30 ScanfSpecifierResult;
31
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)32 static bool ParseScanList(FormatStringHandler &H,
33 ScanfConversionSpecifier &CS,
34 const char *&Beg, const char *E) {
35 const char *I = Beg;
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39 // No more characters?
40 if (I == E) {
41 H.HandleIncompleteScanList(start, I);
42 return true;
43 }
44
45 // Special case: ']' is the first character.
46 if (*I == ']') {
47 if (++I == E) {
48 H.HandleIncompleteScanList(start, I - 1);
49 return true;
50 }
51 }
52
53 // Look for a ']' character which denotes the end of the scan list.
54 while (*I != ']') {
55 if (++I == E) {
56 H.HandleIncompleteScanList(start, I - 1);
57 return true;
58 }
59 }
60
61 CS.setEndScanList(I);
62 return false;
63 }
64
65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66 // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68 const char *&Beg,
69 const char *E,
70 unsigned &argIndex,
71 const LangOptions &LO,
72 const TargetInfo &Target) {
73
74 using namespace clang::analyze_scanf;
75 const char *I = Beg;
76 const char *Start = 0;
77 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78
79 // Look for a '%' character that indicates the start of a format specifier.
80 for ( ; I != E ; ++I) {
81 char c = *I;
82 if (c == '\0') {
83 // Detect spurious null characters, which are likely errors.
84 H.HandleNullChar(I);
85 return true;
86 }
87 if (c == '%') {
88 Start = I++; // Record the start of the format specifier.
89 break;
90 }
91 }
92
93 // No format specifier found?
94 if (!Start)
95 return false;
96
97 if (I == E) {
98 // No more characters left?
99 H.HandleIncompleteSpecifier(Start, E - Start);
100 return true;
101 }
102
103 ScanfSpecifier FS;
104 if (ParseArgPosition(H, FS, Start, I, E))
105 return true;
106
107 if (I == E) {
108 // No more characters left?
109 H.HandleIncompleteSpecifier(Start, E - Start);
110 return true;
111 }
112
113 // Look for '*' flag if it is present.
114 if (*I == '*') {
115 FS.setSuppressAssignment(I);
116 if (++I == E) {
117 H.HandleIncompleteSpecifier(Start, E - Start);
118 return true;
119 }
120 }
121
122 // Look for the field width (if any). Unlike printf, this is either
123 // a fixed integer or isn't present.
124 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127 FS.setFieldWidth(Amt);
128
129 if (I == E) {
130 // No more characters left?
131 H.HandleIncompleteSpecifier(Start, E - Start);
132 return true;
133 }
134 }
135
136 // Look for the length modifier.
137 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138 // No more characters left?
139 H.HandleIncompleteSpecifier(Start, E - Start);
140 return true;
141 }
142
143 // Detect spurious null characters, which are likely errors.
144 if (*I == '\0') {
145 H.HandleNullChar(I);
146 return true;
147 }
148
149 // Finally, look for the conversion specifier.
150 const char *conversionPosition = I++;
151 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152 switch (*conversionPosition) {
153 default:
154 break;
155 case '%': k = ConversionSpecifier::PercentArg; break;
156 case 'A': k = ConversionSpecifier::AArg; break;
157 case 'E': k = ConversionSpecifier::EArg; break;
158 case 'F': k = ConversionSpecifier::FArg; break;
159 case 'G': k = ConversionSpecifier::GArg; break;
160 case 'X': k = ConversionSpecifier::XArg; break;
161 case 'a': k = ConversionSpecifier::aArg; break;
162 case 'd': k = ConversionSpecifier::dArg; break;
163 case 'e': k = ConversionSpecifier::eArg; break;
164 case 'f': k = ConversionSpecifier::fArg; break;
165 case 'g': k = ConversionSpecifier::gArg; break;
166 case 'i': k = ConversionSpecifier::iArg; break;
167 case 'n': k = ConversionSpecifier::nArg; break;
168 case 'c': k = ConversionSpecifier::cArg; break;
169 case 'C': k = ConversionSpecifier::CArg; break;
170 case 'S': k = ConversionSpecifier::SArg; break;
171 case '[': k = ConversionSpecifier::ScanListArg; break;
172 case 'u': k = ConversionSpecifier::uArg; break;
173 case 'x': k = ConversionSpecifier::xArg; break;
174 case 'o': k = ConversionSpecifier::oArg; break;
175 case 's': k = ConversionSpecifier::sArg; break;
176 case 'p': k = ConversionSpecifier::pArg; break;
177 // Apple extensions
178 // Apple-specific
179 case 'D':
180 if (Target.getTriple().isOSDarwin())
181 k = ConversionSpecifier::DArg;
182 break;
183 case 'O':
184 if (Target.getTriple().isOSDarwin())
185 k = ConversionSpecifier::OArg;
186 break;
187 case 'U':
188 if (Target.getTriple().isOSDarwin())
189 k = ConversionSpecifier::UArg;
190 break;
191 }
192 ScanfConversionSpecifier CS(conversionPosition, k);
193 if (k == ScanfConversionSpecifier::ScanListArg) {
194 if (ParseScanList(H, CS, I, E))
195 return true;
196 }
197 FS.setConversionSpecifier(CS);
198 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199 && !FS.usesPositionalArg())
200 FS.setArgIndex(argIndex++);
201
202 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
203 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
204
205 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206 // Assume the conversion takes one argument.
207 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
208 }
209 return ScanfSpecifierResult(Start, FS);
210 }
211
getArgType(ASTContext & Ctx) const212 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213 const ScanfConversionSpecifier &CS = getConversionSpecifier();
214
215 if (!CS.consumesDataArgument())
216 return ArgType::Invalid();
217
218 switch(CS.getKind()) {
219 // Signed int.
220 case ConversionSpecifier::dArg:
221 case ConversionSpecifier::DArg:
222 case ConversionSpecifier::iArg:
223 switch (LM.getKind()) {
224 case LengthModifier::None:
225 return ArgType::PtrTo(Ctx.IntTy);
226 case LengthModifier::AsChar:
227 return ArgType::PtrTo(ArgType::AnyCharTy);
228 case LengthModifier::AsShort:
229 return ArgType::PtrTo(Ctx.ShortTy);
230 case LengthModifier::AsLong:
231 return ArgType::PtrTo(Ctx.LongTy);
232 case LengthModifier::AsLongLong:
233 case LengthModifier::AsQuad:
234 return ArgType::PtrTo(Ctx.LongLongTy);
235 case LengthModifier::AsIntMax:
236 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
237 case LengthModifier::AsSizeT:
238 // FIXME: ssize_t.
239 return ArgType();
240 case LengthModifier::AsPtrDiff:
241 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
242 case LengthModifier::AsLongDouble:
243 // GNU extension.
244 return ArgType::PtrTo(Ctx.LongLongTy);
245 case LengthModifier::AsAllocate:
246 return ArgType::Invalid();
247 case LengthModifier::AsMAllocate:
248 return ArgType::Invalid();
249 }
250
251 // Unsigned int.
252 case ConversionSpecifier::oArg:
253 case ConversionSpecifier::OArg:
254 case ConversionSpecifier::uArg:
255 case ConversionSpecifier::UArg:
256 case ConversionSpecifier::xArg:
257 case ConversionSpecifier::XArg:
258 switch (LM.getKind()) {
259 case LengthModifier::None:
260 return ArgType::PtrTo(Ctx.UnsignedIntTy);
261 case LengthModifier::AsChar:
262 return ArgType::PtrTo(Ctx.UnsignedCharTy);
263 case LengthModifier::AsShort:
264 return ArgType::PtrTo(Ctx.UnsignedShortTy);
265 case LengthModifier::AsLong:
266 return ArgType::PtrTo(Ctx.UnsignedLongTy);
267 case LengthModifier::AsLongLong:
268 case LengthModifier::AsQuad:
269 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
270 case LengthModifier::AsIntMax:
271 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
272 case LengthModifier::AsSizeT:
273 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
274 case LengthModifier::AsPtrDiff:
275 // FIXME: Unsigned version of ptrdiff_t?
276 return ArgType();
277 case LengthModifier::AsLongDouble:
278 // GNU extension.
279 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
280 case LengthModifier::AsAllocate:
281 return ArgType::Invalid();
282 case LengthModifier::AsMAllocate:
283 return ArgType::Invalid();
284 }
285
286 // Float.
287 case ConversionSpecifier::aArg:
288 case ConversionSpecifier::AArg:
289 case ConversionSpecifier::eArg:
290 case ConversionSpecifier::EArg:
291 case ConversionSpecifier::fArg:
292 case ConversionSpecifier::FArg:
293 case ConversionSpecifier::gArg:
294 case ConversionSpecifier::GArg:
295 switch (LM.getKind()) {
296 case LengthModifier::None:
297 return ArgType::PtrTo(Ctx.FloatTy);
298 case LengthModifier::AsLong:
299 return ArgType::PtrTo(Ctx.DoubleTy);
300 case LengthModifier::AsLongDouble:
301 return ArgType::PtrTo(Ctx.LongDoubleTy);
302 default:
303 return ArgType::Invalid();
304 }
305
306 // Char, string and scanlist.
307 case ConversionSpecifier::cArg:
308 case ConversionSpecifier::sArg:
309 case ConversionSpecifier::ScanListArg:
310 switch (LM.getKind()) {
311 case LengthModifier::None:
312 return ArgType::PtrTo(ArgType::AnyCharTy);
313 case LengthModifier::AsLong:
314 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
315 case LengthModifier::AsAllocate:
316 case LengthModifier::AsMAllocate:
317 return ArgType::PtrTo(ArgType::CStrTy);
318 default:
319 return ArgType::Invalid();
320 }
321 case ConversionSpecifier::CArg:
322 case ConversionSpecifier::SArg:
323 // FIXME: Mac OS X specific?
324 switch (LM.getKind()) {
325 case LengthModifier::None:
326 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
327 case LengthModifier::AsAllocate:
328 case LengthModifier::AsMAllocate:
329 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
330 default:
331 return ArgType::Invalid();
332 }
333
334 // Pointer.
335 case ConversionSpecifier::pArg:
336 return ArgType::PtrTo(ArgType::CPointerTy);
337
338 // Write-back.
339 case ConversionSpecifier::nArg:
340 switch (LM.getKind()) {
341 case LengthModifier::None:
342 return ArgType::PtrTo(Ctx.IntTy);
343 case LengthModifier::AsChar:
344 return ArgType::PtrTo(Ctx.SignedCharTy);
345 case LengthModifier::AsShort:
346 return ArgType::PtrTo(Ctx.ShortTy);
347 case LengthModifier::AsLong:
348 return ArgType::PtrTo(Ctx.LongTy);
349 case LengthModifier::AsLongLong:
350 case LengthModifier::AsQuad:
351 return ArgType::PtrTo(Ctx.LongLongTy);
352 case LengthModifier::AsIntMax:
353 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
354 case LengthModifier::AsSizeT:
355 return ArgType(); // FIXME: ssize_t
356 case LengthModifier::AsPtrDiff:
357 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
358 case LengthModifier::AsLongDouble:
359 return ArgType(); // FIXME: Is this a known extension?
360 case LengthModifier::AsAllocate:
361 case LengthModifier::AsMAllocate:
362 return ArgType::Invalid();
363 }
364
365 default:
366 break;
367 }
368
369 return ArgType();
370 }
371
fixType(QualType QT,const LangOptions & LangOpt,ASTContext & Ctx)372 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
373 ASTContext &Ctx) {
374 if (!QT->isPointerType())
375 return false;
376
377 // %n is different from other conversion specifiers; don't try to fix it.
378 if (CS.getKind() == ConversionSpecifier::nArg)
379 return false;
380
381 QualType PT = QT->getPointeeType();
382
383 // If it's an enum, get its underlying type.
384 if (const EnumType *ETy = QT->getAs<EnumType>())
385 QT = ETy->getDecl()->getIntegerType();
386
387 const BuiltinType *BT = PT->getAs<BuiltinType>();
388 if (!BT)
389 return false;
390
391 // Pointer to a character.
392 if (PT->isAnyCharacterType()) {
393 CS.setKind(ConversionSpecifier::sArg);
394 if (PT->isWideCharType())
395 LM.setKind(LengthModifier::AsWideChar);
396 else
397 LM.setKind(LengthModifier::None);
398 return true;
399 }
400
401 // Figure out the length modifier.
402 switch (BT->getKind()) {
403 // no modifier
404 case BuiltinType::UInt:
405 case BuiltinType::Int:
406 case BuiltinType::Float:
407 LM.setKind(LengthModifier::None);
408 break;
409
410 // hh
411 case BuiltinType::Char_U:
412 case BuiltinType::UChar:
413 case BuiltinType::Char_S:
414 case BuiltinType::SChar:
415 LM.setKind(LengthModifier::AsChar);
416 break;
417
418 // h
419 case BuiltinType::Short:
420 case BuiltinType::UShort:
421 LM.setKind(LengthModifier::AsShort);
422 break;
423
424 // l
425 case BuiltinType::Long:
426 case BuiltinType::ULong:
427 case BuiltinType::Double:
428 LM.setKind(LengthModifier::AsLong);
429 break;
430
431 // ll
432 case BuiltinType::LongLong:
433 case BuiltinType::ULongLong:
434 LM.setKind(LengthModifier::AsLongLong);
435 break;
436
437 // L
438 case BuiltinType::LongDouble:
439 LM.setKind(LengthModifier::AsLongDouble);
440 break;
441
442 // Don't know.
443 default:
444 return false;
445 }
446
447 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
448 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
449 namedTypeToLengthModifier(PT, LM);
450
451 // If fixing the length modifier was enough, we are done.
452 if (hasValidLengthModifier(Ctx.getTargetInfo())) {
453 const analyze_scanf::ArgType &AT = getArgType(Ctx);
454 if (AT.isValid() && AT.matchesType(Ctx, QT))
455 return true;
456 }
457
458 // Figure out the conversion specifier.
459 if (PT->isRealFloatingType())
460 CS.setKind(ConversionSpecifier::fArg);
461 else if (PT->isSignedIntegerType())
462 CS.setKind(ConversionSpecifier::dArg);
463 else if (PT->isUnsignedIntegerType())
464 CS.setKind(ConversionSpecifier::uArg);
465 else
466 llvm_unreachable("Unexpected type");
467
468 return true;
469 }
470
toString(raw_ostream & os) const471 void ScanfSpecifier::toString(raw_ostream &os) const {
472 os << "%";
473
474 if (usesPositionalArg())
475 os << getPositionalArgIndex() << "$";
476 if (SuppressAssignment)
477 os << "*";
478
479 FieldWidth.toString(os);
480 os << LM.toString();
481 os << CS.toString();
482 }
483
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)484 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
485 const char *I,
486 const char *E,
487 const LangOptions &LO,
488 const TargetInfo &Target) {
489
490 unsigned argIndex = 0;
491
492 // Keep looking for a format specifier until we have exhausted the string.
493 while (I != E) {
494 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
495 LO, Target);
496 // Did a fail-stop error of any kind occur when parsing the specifier?
497 // If so, don't do any more processing.
498 if (FSR.shouldStop())
499 return true;
500 // Did we exhaust the string or encounter an error that
501 // we can recover from?
502 if (!FSR.hasValue())
503 continue;
504 // We have a format specifier. Pass it to the callback.
505 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
506 I - FSR.getStart())) {
507 return true;
508 }
509 }
510 assert(I == E && "Format string not exhausted");
511 return false;
512 }
513