• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18 
19 #include <memory>
20 
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23 
24 #include "perfetto/trace_processor/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_utils.h"
26 #include "src/trace_processor/sorter/trace_sorter.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/util/status_macros.h"
29 
30 namespace perfetto {
31 namespace trace_processor {
32 
33 namespace {
34 
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)35 base::Status AppendUnescapedCharacter(char c,
36                                       bool is_escaping,
37                                       std::string* key) {
38   if (is_escaping) {
39     switch (c) {
40       case '"':
41       case '\\':
42       case '/':
43         key->push_back(c);
44         break;
45       case 'b':
46         key->push_back('\b');
47         break;
48       case 'f':
49         key->push_back('\f');
50         break;
51       case 'n':
52         key->push_back('\n');
53         break;
54       case 'r':
55         key->push_back('\r');
56         break;
57       case 't':
58         key->push_back('\t');
59         break;
60       case 'u':
61         // Just pass through \uxxxx escape sequences which JSON supports but is
62         // not worth the effort to parse as we never use them here.
63         key->append("\\u");
64         break;
65       default:
66         return base::ErrStatus("Illegal character in JSON");
67     }
68   } else if (c != '\\') {
69     key->push_back(c);
70   }
71   return base::OkStatus();
72 }
73 
74 enum class ReadStringRes {
75   kEndOfString,
76   kNeedsMoreData,
77   kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80                                 const char* end,
81                                 std::string* key,
82                                 const char** next) {
83   if (start == end) {
84     return ReadStringRes::kNeedsMoreData;
85   }
86   if (*start != '"') {
87     return ReadStringRes::kFatalError;
88   }
89 
90   bool is_escaping = false;
91   for (const char* s = start + 1; s < end; s++) {
92     // Control characters are not allowed in JSON strings.
93     if (iscntrl(*s))
94       return ReadStringRes::kFatalError;
95 
96     // If we get a quote character end of the string.
97     if (*s == '"' && !is_escaping) {
98       *next = s + 1;
99       return ReadStringRes::kEndOfString;
100     }
101 
102     base::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
103     if (!status.ok())
104       return ReadStringRes::kFatalError;
105 
106     // If we're in a string and we see a backslash and the last character was
107     // not a backslash the next character is escaped:
108     is_escaping = *s == '\\' && !is_escaping;
109   }
110   return ReadStringRes::kNeedsMoreData;
111 }
112 
113 enum class SkipValueRes {
114   kEndOfValue,
115   kNeedsMoreData,
116   kFatalError,
117 };
SkipOneJsonValue(const char * start,const char * end,const char ** next)118 SkipValueRes SkipOneJsonValue(const char* start,
119                               const char* end,
120                               const char** next) {
121   uint32_t brace_count = 0;
122   uint32_t bracket_count = 0;
123   for (const char* s = start; s < end; s++) {
124     if (*s == '"') {
125       // Because strings can contain {}[] characters, handle them separately
126       // before anything else.
127       std::string ignored;
128       const char* str_next = nullptr;
129       switch (ReadOneJsonString(s, end, &ignored, &str_next)) {
130         case ReadStringRes::kFatalError:
131           return SkipValueRes::kFatalError;
132         case ReadStringRes::kNeedsMoreData:
133           return SkipValueRes::kNeedsMoreData;
134         case ReadStringRes::kEndOfString:
135           // -1 as the loop body will +1 getting to the correct place.
136           s = str_next - 1;
137           break;
138       }
139       continue;
140     }
141     if (brace_count == 0 && bracket_count == 0 && (*s == ',' || *s == '}')) {
142       // Regardless of a comma or brace, this will be skipped by the caller so
143       // just set it to this character.
144       *next = s;
145       return SkipValueRes::kEndOfValue;
146     }
147     if (*s == '[') {
148       ++bracket_count;
149       continue;
150     }
151     if (*s == ']') {
152       if (bracket_count == 0) {
153         return SkipValueRes::kFatalError;
154       }
155       --bracket_count;
156       continue;
157     }
158     if (*s == '{') {
159       ++brace_count;
160       continue;
161     }
162     if (*s == '}') {
163       if (brace_count == 0) {
164         return SkipValueRes::kFatalError;
165       }
166       --brace_count;
167       continue;
168     }
169   }
170   return SkipValueRes::kNeedsMoreData;
171 }
172 
SetOutAndReturn(const char * ptr,const char ** out)173 base::Status SetOutAndReturn(const char* ptr, const char** out) {
174   *out = ptr;
175   return base::OkStatus();
176 }
177 
178 }  // namespace
179 
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)180 ReadDictRes ReadOneJsonDict(const char* start,
181                             const char* end,
182                             base::StringView* value,
183                             const char** next) {
184   int braces = 0;
185   int square_brackets = 0;
186   const char* dict_begin = nullptr;
187   bool in_string = false;
188   bool is_escaping = false;
189   for (const char* s = start; s < end; s++) {
190     if (isspace(*s) || *s == ',')
191       continue;
192     if (*s == '"' && !is_escaping) {
193       in_string = !in_string;
194       continue;
195     }
196     if (in_string) {
197       // If we're in a string and we see a backslash and the last character was
198       // not a backslash the next character is escaped:
199       is_escaping = *s == '\\' && !is_escaping;
200       // If we're currently parsing a string we should ignore otherwise special
201       // characters:
202       continue;
203     }
204     if (*s == '{') {
205       if (braces == 0)
206         dict_begin = s;
207       braces++;
208       continue;
209     }
210     if (*s == '}') {
211       if (braces <= 0)
212         return ReadDictRes::kEndOfTrace;
213       if (--braces > 0)
214         continue;
215       size_t len = static_cast<size_t>((s + 1) - dict_begin);
216       *value = base::StringView(dict_begin, len);
217       *next = s + 1;
218       return ReadDictRes::kFoundDict;
219     }
220     if (*s == '[') {
221       square_brackets++;
222       continue;
223     }
224     if (*s == ']') {
225       if (square_brackets == 0) {
226         // We've reached the end of [traceEvents] array.
227         // There might be other top level keys in the json (e.g. metadata)
228         // after.
229         *next = s + 1;
230         return ReadDictRes::kEndOfArray;
231       }
232       square_brackets--;
233     }
234   }
235   return ReadDictRes::kNeedsMoreData;
236 }
237 
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)238 ReadKeyRes ReadOneJsonKey(const char* start,
239                           const char* end,
240                           std::string* key,
241                           const char** next) {
242   enum class NextToken {
243     kStringOrEndOfDict,
244     kColon,
245     kValue,
246   };
247 
248   NextToken next_token = NextToken::kStringOrEndOfDict;
249   for (const char* s = start; s < end; s++) {
250     // Whitespace characters anywhere can be skipped.
251     if (isspace(*s))
252       continue;
253 
254     switch (next_token) {
255       case NextToken::kStringOrEndOfDict: {
256         // If we see a closing brace, that means we've reached the end of the
257         // wrapping dictionary.
258         if (*s == '}') {
259           *next = s + 1;
260           return ReadKeyRes::kEndOfDictionary;
261         }
262 
263         // If we see a comma separator, just ignore it.
264         if (*s == ',')
265           continue;
266 
267         auto res = ReadOneJsonString(s, end, key, &s);
268         if (res == ReadStringRes::kFatalError)
269           return ReadKeyRes::kFatalError;
270         if (res == ReadStringRes::kNeedsMoreData)
271           return ReadKeyRes::kNeedsMoreData;
272 
273         // We need to decrement from the pointer as the loop will increment
274         // it back up.
275         s--;
276         next_token = NextToken::kColon;
277         break;
278       }
279       case NextToken::kColon:
280         if (*s != ':')
281           return ReadKeyRes::kFatalError;
282         next_token = NextToken::kValue;
283         break;
284       case NextToken::kValue:
285         // Allowed value starting chars: [ { digit - "
286         // Also allowed: true, false, null. For simplicities sake, we only check
287         // against the first character as we're not trying to be super accurate.
288         if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
289             *s == 't' || *s == 'f' || *s == 'n') {
290           *next = s;
291           return ReadKeyRes::kFoundKey;
292         }
293         return ReadKeyRes::kFatalError;
294     }
295   }
296   return ReadKeyRes::kNeedsMoreData;
297 }
298 
ExtractValueForJsonKey(base::StringView dict,const std::string & key,std::optional<std::string> * value)299 base::Status ExtractValueForJsonKey(base::StringView dict,
300                                     const std::string& key,
301                                     std::optional<std::string>* value) {
302   PERFETTO_DCHECK(dict.size() >= 2);
303 
304   const char* start = dict.data();
305   const char* end = dict.data() + dict.size();
306 
307   enum ExtractValueState {
308     kBeforeDict,
309     kInsideDict,
310     kAfterDict,
311   };
312 
313   ExtractValueState state = kBeforeDict;
314   for (const char* s = start; s < end;) {
315     if (isspace(*s)) {
316       ++s;
317       continue;
318     }
319 
320     if (state == kBeforeDict) {
321       if (*s == '{') {
322         ++s;
323         state = kInsideDict;
324         continue;
325       }
326       return base::ErrStatus("Unexpected character before JSON dict");
327     }
328 
329     if (state == kAfterDict)
330       return base::ErrStatus("Unexpected character after JSON dict");
331 
332     PERFETTO_DCHECK(state == kInsideDict);
333     PERFETTO_DCHECK(s < end);
334 
335     if (*s == '}') {
336       ++s;
337       state = kAfterDict;
338       continue;
339     }
340 
341     std::string current_key;
342     auto res = ReadOneJsonKey(s, end, &current_key, &s);
343     if (res == ReadKeyRes::kEndOfDictionary)
344       break;
345 
346     if (res == ReadKeyRes::kFatalError) {
347       return base::ErrStatus(
348           "Failure parsing JSON: encountered fatal error while parsing key for "
349           "value");
350     }
351 
352     if (res == ReadKeyRes::kNeedsMoreData) {
353       return base::ErrStatus("Failure parsing JSON: partial JSON dictionary");
354     }
355 
356     PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
357 
358     if (*s == '[') {
359       return base::ErrStatus(
360           "Failure parsing JSON: unsupported JSON dictionary with array");
361     }
362 
363     std::string value_str;
364     if (*s == '{') {
365       base::StringView dict_str;
366       ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
367       if (dict_res == ReadDictRes::kNeedsMoreData ||
368           dict_res == ReadDictRes::kEndOfArray ||
369           dict_res == ReadDictRes::kEndOfTrace) {
370         return base::ErrStatus(
371             "Failure parsing JSON: unable to parse dictionary");
372       }
373       value_str = dict_str.ToStdString();
374     } else if (*s == '"') {
375       auto str_res = ReadOneJsonString(s, end, &value_str, &s);
376       if (str_res == ReadStringRes::kNeedsMoreData ||
377           str_res == ReadStringRes::kFatalError) {
378         return base::ErrStatus("Failure parsing JSON: unable to parse string");
379       }
380     } else {
381       const char* value_start = s;
382       const char* value_end = end;
383       for (; s < end; ++s) {
384         if (*s == ',' || isspace(*s) || *s == '}') {
385           value_end = s;
386           break;
387         }
388       }
389       value_str = std::string(value_start, value_end);
390     }
391 
392     if (key == current_key) {
393       *value = value_str;
394       return base::OkStatus();
395     }
396   }
397 
398   if (state != kAfterDict)
399     return base::ErrStatus("Failure parsing JSON: malformed dictionary");
400 
401   *value = std::nullopt;
402   return base::OkStatus();
403 }
404 
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)405 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
406                                          const char* end,
407                                          std::string* line,
408                                          const char** next) {
409   bool is_escaping = false;
410   for (const char* s = start; s < end; s++) {
411     // If we get a quote character and we're not escaping, we are done with the
412     // system trace string.
413     if (*s == '"' && !is_escaping) {
414       *next = s + 1;
415       return ReadSystemLineRes::kEndOfSystemTrace;
416     }
417 
418     // If we are escaping n, that means this is a new line which is a delimiter
419     // for a system trace line.
420     if (*s == 'n' && is_escaping) {
421       *next = s + 1;
422       return ReadSystemLineRes::kFoundLine;
423     }
424 
425     base::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
426     if (!status.ok())
427       return ReadSystemLineRes::kFatalError;
428 
429     // If we're in a string and we see a backslash and the last character was
430     // not a backslash the next character is escaped:
431     is_escaping = *s == '\\' && !is_escaping;
432   }
433   return ReadSystemLineRes::kNeedsMoreData;
434 }
435 
JsonTraceTokenizer(TraceProcessorContext * ctx)436 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
437     : context_(ctx) {}
438 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
439 
Parse(TraceBlobView blob)440 base::Status JsonTraceTokenizer::Parse(TraceBlobView blob) {
441   PERFETTO_DCHECK(json::IsJsonSupported());
442 
443   buffer_.insert(buffer_.end(), blob.data(), blob.data() + blob.size());
444   const char* buf = buffer_.data();
445   const char* next = buf;
446   const char* end = buf + buffer_.size();
447 
448   if (offset_ == 0) {
449     // Strip leading whitespace.
450     while (next != end && isspace(*next)) {
451       next++;
452     }
453     if (next == end) {
454       return base::ErrStatus(
455           "Failure parsing JSON: first chunk has only whitespace");
456     }
457 
458     // Trace could begin in any of these ways:
459     // {"traceEvents":[{
460     // { "traceEvents": [{
461     // [{
462     if (*next != '{' && *next != '[') {
463       return base::ErrStatus(
464           "Failure parsing JSON: first non-whitespace character is not [ or {");
465     }
466 
467     // Figure out the format of the JSON file based on the first non-whitespace
468     // character.
469     format_ = *next == '{' ? TraceFormat::kOuterDictionary
470                            : TraceFormat::kOnlyTraceEvents;
471 
472     // Skip the '[' or '{' character.
473     next++;
474 
475     // Set our current position based on the format of the trace.
476     position_ = format_ == TraceFormat::kOuterDictionary
477                     ? TracePosition::kDictionaryKey
478                     : TracePosition::kInsideTraceEventsArray;
479   }
480   RETURN_IF_ERROR(ParseInternal(next, end, &next));
481 
482   offset_ += static_cast<uint64_t>(next - buf);
483   buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
484   return base::OkStatus();
485 }
486 
ParseInternal(const char * start,const char * end,const char ** out)487 base::Status JsonTraceTokenizer::ParseInternal(const char* start,
488                                                const char* end,
489                                                const char** out) {
490   PERFETTO_DCHECK(json::IsJsonSupported());
491 
492   switch (position_) {
493     case TracePosition::kDictionaryKey:
494       return HandleDictionaryKey(start, end, out);
495     case TracePosition::kInsideSystemTraceEventsString:
496       return HandleSystemTraceEvent(start, end, out);
497     case TracePosition::kInsideTraceEventsArray:
498       return HandleTraceEvent(start, end, out);
499     case TracePosition::kEof: {
500       return start == end
501                  ? base::OkStatus()
502                  : base::ErrStatus(
503                        "Failure parsing JSON: tried to parse data after EOF");
504     }
505   }
506   PERFETTO_FATAL("For GCC");
507 }
508 
HandleTraceEvent(const char * start,const char * end,const char ** out)509 base::Status JsonTraceTokenizer::HandleTraceEvent(const char* start,
510                                                   const char* end,
511                                                   const char** out) {
512   const char* next = start;
513   while (next < end) {
514     base::StringView unparsed;
515     switch (ReadOneJsonDict(next, end, &unparsed, &next)) {
516       case ReadDictRes::kEndOfArray: {
517         if (format_ == TraceFormat::kOnlyTraceEvents) {
518           position_ = TracePosition::kEof;
519           return SetOutAndReturn(next, out);
520         }
521 
522         position_ = TracePosition::kDictionaryKey;
523         return ParseInternal(next, end, out);
524       }
525       case ReadDictRes::kEndOfTrace:
526         position_ = TracePosition::kEof;
527         return SetOutAndReturn(next, out);
528       case ReadDictRes::kNeedsMoreData:
529         return SetOutAndReturn(next, out);
530       case ReadDictRes::kFoundDict:
531         break;
532     }
533 
534     std::optional<std::string> opt_raw_ts;
535     RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
536     std::optional<int64_t> opt_ts =
537         opt_raw_ts ? json::CoerceToTs(*opt_raw_ts) : std::nullopt;
538     int64_t ts = 0;
539     if (opt_ts.has_value()) {
540       ts = opt_ts.value();
541     } else {
542       // Metadata events may omit ts. In all other cases error:
543       std::optional<std::string> opt_raw_ph;
544       RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
545       if (!opt_raw_ph || *opt_raw_ph != "M") {
546         context_->storage->IncrementStats(stats::json_tokenizer_failure);
547         continue;
548       }
549     }
550     context_->sorter->PushJsonValue(ts, unparsed.ToStdString());
551   }
552   return SetOutAndReturn(next, out);
553 }
554 
HandleDictionaryKey(const char * start,const char * end,const char ** out)555 base::Status JsonTraceTokenizer::HandleDictionaryKey(const char* start,
556                                                      const char* end,
557                                                      const char** out) {
558   if (format_ != TraceFormat::kOuterDictionary) {
559     return base::ErrStatus(
560         "Failure parsing JSON: illegal format when parsing dictionary key");
561   }
562 
563   const char* next = start;
564   std::string key;
565   switch (ReadOneJsonKey(start, end, &key, &next)) {
566     case ReadKeyRes::kFatalError:
567       return base::ErrStatus(
568           "Failure parsing JSON: encountered fatal error while parsing key");
569     case ReadKeyRes::kEndOfDictionary:
570       position_ = TracePosition::kEof;
571       return SetOutAndReturn(next, out);
572     case ReadKeyRes::kNeedsMoreData:
573       // If we didn't manage to read the key we need to set |out| to |start|
574       // (*not* |next|) to keep the state machine happy.
575       return SetOutAndReturn(start, out);
576     case ReadKeyRes::kFoundKey:
577       break;
578   }
579 
580   // ReadOneJsonKey should ensure that the first character of the value is
581   // available.
582   PERFETTO_CHECK(next < end);
583 
584   if (key == "traceEvents") {
585     // Skip the [ character opening the array.
586     if (*next != '[') {
587       return base::ErrStatus(
588           "Failure parsing JSON: traceEvents is not an array.");
589     }
590     next++;
591 
592     position_ = TracePosition::kInsideTraceEventsArray;
593     return ParseInternal(next, end, out);
594   }
595 
596   if (key == "systemTraceEvents") {
597     // Skip the " character opening the string.
598     if (*next != '"') {
599       return base::ErrStatus(
600           "Failure parsing JSON: systemTraceEvents is not an string.");
601     }
602     next++;
603 
604     position_ = TracePosition::kInsideSystemTraceEventsString;
605     return ParseInternal(next, end, out);
606   }
607 
608   if (key == "displayTimeUnit") {
609     std::string time_unit;
610     auto result = ReadOneJsonString(next, end, &time_unit, &next);
611     if (result == ReadStringRes::kFatalError)
612       return base::ErrStatus("Could not parse displayTimeUnit");
613     context_->storage->IncrementStats(stats::json_display_time_unit);
614     return ParseInternal(next, end, out);
615   }
616 
617   // If we don't know the key for this JSON value just skip it.
618   switch (SkipOneJsonValue(next, end, &next)) {
619     case SkipValueRes::kFatalError:
620       return base::ErrStatus(
621           "Failure parsing JSON: error while parsing value for key %s",
622           key.c_str());
623     case SkipValueRes::kNeedsMoreData:
624       // If we didn't manage to read the key *and* the value, we need to set
625       // |out| to |start| (*not* |next|) to keep the state machine happy (as
626       // we expect to always see a key before the value).
627       return SetOutAndReturn(start, out);
628     case SkipValueRes::kEndOfValue:
629       return ParseInternal(next, end, out);
630   }
631   PERFETTO_FATAL("For GCC");
632 }
633 
HandleSystemTraceEvent(const char * start,const char * end,const char ** out)634 base::Status JsonTraceTokenizer::HandleSystemTraceEvent(const char* start,
635                                                         const char* end,
636                                                         const char** out) {
637   if (format_ != TraceFormat::kOuterDictionary) {
638     return base::ErrStatus(
639         "Failure parsing JSON: illegal format when parsing system events");
640   }
641 
642   const char* next = start;
643   while (next < end) {
644     std::string raw_line;
645     switch (ReadOneSystemTraceLine(next, end, &raw_line, &next)) {
646       case ReadSystemLineRes::kFatalError:
647         return base::ErrStatus(
648             "Failure parsing JSON: encountered fatal error while parsing "
649             "event inside trace event string");
650       case ReadSystemLineRes::kNeedsMoreData:
651         return SetOutAndReturn(next, out);
652       case ReadSystemLineRes::kEndOfSystemTrace:
653         position_ = TracePosition::kDictionaryKey;
654         return ParseInternal(next, end, out);
655       case ReadSystemLineRes::kFoundLine:
656         break;
657     }
658 
659     if (base::StartsWith(raw_line, "#") || raw_line.empty())
660       continue;
661 
662     SystraceLine line;
663     RETURN_IF_ERROR(systrace_line_tokenizer_.Tokenize(raw_line, &line));
664     context_->sorter->PushSystraceLine(std::move(line));
665   }
666   return SetOutAndReturn(next, out);
667 }
668 
NotifyEndOfFile()669 void JsonTraceTokenizer::NotifyEndOfFile() {
670   PERFETTO_DCHECK(position_ == TracePosition::kEof);
671 }
672 
673 }  // namespace trace_processor
674 }  // namespace perfetto
675