• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18 
19 #include <memory>
20 
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23 
24 #include "perfetto/trace_processor/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_utils.h"
26 #include "src/trace_processor/storage/stats.h"
27 #include "src/trace_processor/trace_sorter.h"
28 #include "src/trace_processor/util/status_macros.h"
29 
30 namespace perfetto {
31 namespace trace_processor {
32 
33 namespace {
34 
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)35 util::Status AppendUnescapedCharacter(char c,
36                                       bool is_escaping,
37                                       std::string* key) {
38   if (is_escaping) {
39     switch (c) {
40       case '"':
41       case '\\':
42       case '/':
43         key->push_back(c);
44         break;
45       case 'b':
46         key->push_back('\b');
47         break;
48       case 'f':
49         key->push_back('\f');
50         break;
51       case 'n':
52         key->push_back('\n');
53         break;
54       case 'r':
55         key->push_back('\r');
56         break;
57       case 't':
58         key->push_back('\t');
59         break;
60       case 'u':
61         // Just pass through \uxxxx escape sequences which JSON supports but is
62         // not worth the effort to parse as we never use them here.
63         key->append("\\u");
64         break;
65       default:
66         return util::ErrStatus("Illegal character in JSON");
67     }
68   } else if (c != '\\') {
69     key->push_back(c);
70   }
71   return util::OkStatus();
72 }
73 
74 enum class ReadStringRes {
75   kEndOfString,
76   kNeedsMoreData,
77   kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80                                 const char* end,
81                                 std::string* key,
82                                 const char** next) {
83   bool is_escaping = false;
84   for (const char* s = start; s < end; s++) {
85     // Control characters are not allowed in JSON strings.
86     if (iscntrl(*s))
87       return ReadStringRes::kFatalError;
88 
89     // If we get a quote character end of the string.
90     if (*s == '"' && !is_escaping) {
91       *next = s + 1;
92       return ReadStringRes::kEndOfString;
93     }
94 
95     util::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
96     if (!status.ok())
97       return ReadStringRes::kFatalError;
98 
99     // If we're in a string and we see a backslash and the last character was
100     // not a backslash the next character is escaped:
101     is_escaping = *s == '\\' && !is_escaping;
102   }
103   return ReadStringRes::kNeedsMoreData;
104 }
105 
106 }  // namespace
107 
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)108 ReadDictRes ReadOneJsonDict(const char* start,
109                             const char* end,
110                             base::StringView* value,
111                             const char** next) {
112   int braces = 0;
113   int square_brackets = 0;
114   const char* dict_begin = nullptr;
115   bool in_string = false;
116   bool is_escaping = false;
117   for (const char* s = start; s < end; s++) {
118     if (isspace(*s) || *s == ',')
119       continue;
120     if (*s == '"' && !is_escaping) {
121       in_string = !in_string;
122       continue;
123     }
124     if (in_string) {
125       // If we're in a string and we see a backslash and the last character was
126       // not a backslash the next character is escaped:
127       is_escaping = *s == '\\' && !is_escaping;
128       // If we're currently parsing a string we should ignore otherwise special
129       // characters:
130       continue;
131     }
132     if (*s == '{') {
133       if (braces == 0)
134         dict_begin = s;
135       braces++;
136       continue;
137     }
138     if (*s == '}') {
139       if (braces <= 0)
140         return ReadDictRes::kEndOfTrace;
141       if (--braces > 0)
142         continue;
143       size_t len = static_cast<size_t>((s + 1) - dict_begin);
144       *value = base::StringView(dict_begin, len);
145       *next = s + 1;
146       return ReadDictRes::kFoundDict;
147     }
148     if (*s == '[') {
149       square_brackets++;
150       continue;
151     }
152     if (*s == ']') {
153       if (square_brackets == 0) {
154         // We've reached the end of [traceEvents] array.
155         // There might be other top level keys in the json (e.g. metadata)
156         // after.
157         *next = s + 1;
158         return ReadDictRes::kEndOfArray;
159       }
160       square_brackets--;
161     }
162   }
163   return ReadDictRes::kNeedsMoreData;
164 }
165 
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)166 ReadKeyRes ReadOneJsonKey(const char* start,
167                           const char* end,
168                           std::string* key,
169                           const char** next) {
170   enum class NextToken {
171     kStringOrEndOfDict,
172     kColon,
173     kValue,
174   };
175 
176   NextToken next_token = NextToken::kStringOrEndOfDict;
177   for (const char* s = start; s < end; s++) {
178     // Whitespace characters anywhere can be skipped.
179     if (isspace(*s))
180       continue;
181 
182     switch (next_token) {
183       case NextToken::kStringOrEndOfDict: {
184         // If we see a closing brace, that means we've reached the end of the
185         // wrapping dictionary.
186         if (*s == '}') {
187           *next = s + 1;
188           return ReadKeyRes::kEndOfDictionary;
189         }
190 
191         // If we see a comma separator, just ignore it.
192         if (*s == ',')
193           continue;
194 
195         // If we see anything else but a quote character here, this cannot be a
196         // valid key.
197         if (*s != '"')
198           return ReadKeyRes::kFatalError;
199 
200         auto res = ReadOneJsonString(s + 1, end, key, &s);
201         if (res == ReadStringRes::kFatalError)
202           return ReadKeyRes::kFatalError;
203         if (res == ReadStringRes::kNeedsMoreData)
204           return ReadKeyRes::kNeedsMoreData;
205 
206         // We need to decrement from the pointer as the loop will increment
207         // it back up.
208         s--;
209         next_token = NextToken::kColon;
210         break;
211       }
212       case NextToken::kColon:
213         if (*s != ':')
214           return ReadKeyRes::kFatalError;
215         next_token = NextToken::kValue;
216         break;
217       case NextToken::kValue:
218         // Allowed value starting chars: [ { digit - "
219         // Also allowed: true, false, null. For simplicities sake, we only check
220         // against the first character as we're not trying to be super accurate.
221         if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
222             *s == 't' || *s == 'f' || *s == 'n') {
223           *next = s;
224           return ReadKeyRes::kFoundKey;
225         }
226         return ReadKeyRes::kFatalError;
227     }
228   }
229   return ReadKeyRes::kNeedsMoreData;
230 }
231 
ExtractValueForJsonKey(base::StringView dict,const std::string & key,base::Optional<std::string> * value)232 util::Status ExtractValueForJsonKey(base::StringView dict,
233                                     const std::string& key,
234                                     base::Optional<std::string>* value) {
235   PERFETTO_DCHECK(dict.size() >= 2);
236 
237   const char* start = dict.data();
238   const char* end = dict.data() + dict.size();
239 
240   enum ExtractValueState {
241     kBeforeDict,
242     kInsideDict,
243     kAfterDict,
244   };
245 
246   ExtractValueState state = kBeforeDict;
247   for (const char* s = start; s < end;) {
248     if (isspace(*s)) {
249       ++s;
250       continue;
251     }
252 
253     if (state == kBeforeDict) {
254       if (*s == '{') {
255         ++s;
256         state = kInsideDict;
257         continue;
258       }
259       return util::ErrStatus("Unexpected character before JSON dict");
260     }
261 
262     if (state == kAfterDict)
263       return util::ErrStatus("Unexpected character after JSON dict");
264 
265     PERFETTO_DCHECK(state == kInsideDict);
266     PERFETTO_DCHECK(s < end);
267 
268     if (*s == '}') {
269       ++s;
270       state = kAfterDict;
271       continue;
272     }
273 
274     std::string current_key;
275     auto res = ReadOneJsonKey(s, end, &current_key, &s);
276     if (res == ReadKeyRes::kEndOfDictionary)
277       break;
278 
279     if (res == ReadKeyRes::kFatalError)
280       return util::ErrStatus("Failure parsing JSON: encountered fatal error");
281 
282     if (res == ReadKeyRes::kNeedsMoreData) {
283       return util::ErrStatus("Failure parsing JSON: partial JSON dictionary");
284     }
285 
286     PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
287 
288     if (*s == '[') {
289       return util::ErrStatus(
290           "Failure parsing JSON: unsupported JSON dictionary with array");
291     }
292 
293     std::string value_str;
294     if (*s == '{') {
295       base::StringView dict_str;
296       ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
297       if (dict_res == ReadDictRes::kNeedsMoreData ||
298           dict_res == ReadDictRes::kEndOfArray ||
299           dict_res == ReadDictRes::kEndOfTrace) {
300         return util::ErrStatus(
301             "Failure parsing JSON: unable to parse dictionary");
302       }
303       value_str = dict_str.ToStdString();
304     } else if (*s == '"') {
305       auto str_res = ReadOneJsonString(s + 1, end, &value_str, &s);
306       if (str_res == ReadStringRes::kNeedsMoreData ||
307           str_res == ReadStringRes::kFatalError) {
308         return util::ErrStatus("Failure parsing JSON: unable to parse string");
309       }
310     } else {
311       const char* value_start = s;
312       const char* value_end = end;
313       for (; s < end; ++s) {
314         if (*s == ',' || isspace(*s) || *s == '}') {
315           value_end = s;
316           break;
317         }
318       }
319       value_str = std::string(value_start, value_end);
320     }
321 
322     if (key == current_key) {
323       *value = value_str;
324       return util::OkStatus();
325     }
326   }
327 
328   if (state != kAfterDict)
329     return util::ErrStatus("Failure parsing JSON: malformed dictionary");
330 
331   *value = base::nullopt;
332   return util::OkStatus();
333 }
334 
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)335 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
336                                          const char* end,
337                                          std::string* line,
338                                          const char** next) {
339   bool is_escaping = false;
340   for (const char* s = start; s < end; s++) {
341     // If we get a quote character and we're not escaping, we are done with the
342     // system trace string.
343     if (*s == '"' && !is_escaping) {
344       *next = s + 1;
345       return ReadSystemLineRes::kEndOfSystemTrace;
346     }
347 
348     // If we are escaping n, that means this is a new line which is a delimiter
349     // for a system trace line.
350     if (*s == 'n' && is_escaping) {
351       *next = s + 1;
352       return ReadSystemLineRes::kFoundLine;
353     }
354 
355     util::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
356     if (!status.ok())
357       return ReadSystemLineRes::kFatalError;
358 
359     // If we're in a string and we see a backslash and the last character was
360     // not a backslash the next character is escaped:
361     is_escaping = *s == '\\' && !is_escaping;
362   }
363   return ReadSystemLineRes::kNeedsMoreData;
364 }
365 
JsonTraceTokenizer(TraceProcessorContext * ctx)366 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
367     : context_(ctx) {}
368 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
369 
Parse(TraceBlobView blob)370 util::Status JsonTraceTokenizer::Parse(TraceBlobView blob) {
371   PERFETTO_DCHECK(json::IsJsonSupported());
372 
373   buffer_.insert(buffer_.end(), blob.data(), blob.data() + blob.size());
374   const char* buf = buffer_.data();
375   const char* next = buf;
376   const char* end = buf + buffer_.size();
377 
378   if (offset_ == 0) {
379     // Strip leading whitespace.
380     while (next != end && isspace(*next)) {
381       next++;
382     }
383     if (next == end) {
384       return util::ErrStatus(
385           "Failure parsing JSON: first chunk has only whitespace");
386     }
387 
388     // Trace could begin in any of these ways:
389     // {"traceEvents":[{
390     // { "traceEvents": [{
391     // [{
392     if (*next != '{' && *next != '[') {
393       return util::ErrStatus(
394           "Failure parsing JSON: first non-whitespace character is not [ or {");
395     }
396 
397     // Figure out the format of the JSON file based on the first non-whitespace
398     // character.
399     format_ = *next == '{' ? TraceFormat::kOuterDictionary
400                            : TraceFormat::kOnlyTraceEvents;
401 
402     // Skip the '[' or '{' character.
403     next++;
404 
405     // Set our current position based on the format of the trace.
406     position_ = format_ == TraceFormat::kOuterDictionary
407                     ? TracePosition::kDictionaryKey
408                     : TracePosition::kTraceEventsArray;
409   }
410 
411   auto status = ParseInternal(next, end, &next);
412   if (!status.ok())
413     return status;
414 
415   offset_ += static_cast<uint64_t>(next - buf);
416   buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
417   return util::OkStatus();
418 }
419 
ParseInternal(const char * start,const char * end,const char ** out)420 util::Status JsonTraceTokenizer::ParseInternal(const char* start,
421                                                const char* end,
422                                                const char** out) {
423   PERFETTO_DCHECK(json::IsJsonSupported());
424   auto* trace_sorter = context_->sorter.get();
425 
426   const char* next = start;
427   switch (position_) {
428     case TracePosition::kDictionaryKey: {
429       if (format_ != TraceFormat::kOuterDictionary) {
430         return util::ErrStatus(
431             "Failure parsing JSON: illegal format when parsing dictionary key");
432       }
433 
434       std::string key;
435       auto res = ReadOneJsonKey(start, end, &key, &next);
436       if (res == ReadKeyRes::kFatalError)
437         return util::ErrStatus("Failure parsing JSON: encountered fatal error");
438 
439       if (res == ReadKeyRes::kEndOfDictionary ||
440           res == ReadKeyRes::kNeedsMoreData) {
441         break;
442       }
443 
444       if (key == "traceEvents") {
445         position_ = TracePosition::kTraceEventsArray;
446         return ParseInternal(next + 1, end, out);
447       } else if (key == "systemTraceEvents") {
448         position_ = TracePosition::kSystemTraceEventsString;
449         return ParseInternal(next + 1, end, out);
450       } else if (key == "metadata") {
451         position_ = TracePosition::kWaitingForMetadataDictionary;
452         return ParseInternal(next + 1, end, out);
453       } else if (key == "displayTimeUnit") {
454         std::string time_unit;
455         auto result = ReadOneJsonString(next + 1, end, &time_unit, &next);
456         if (result == ReadStringRes::kFatalError)
457           return util::ErrStatus("Could not parse displayTimeUnit");
458         context_->storage->IncrementStats(stats::json_display_time_unit);
459         return ParseInternal(next, end, out);
460       } else if (key == "otherData") {
461         base::StringView unparsed;
462         const auto other = ReadOneJsonDict(next, end, &unparsed, &next);
463         if (other == ReadDictRes::kEndOfArray)
464           return util::ErrStatus(
465               "Failure parsing JSON: Missing ] in otherData");
466         if (other == ReadDictRes::kEndOfTrace)
467           return util::ErrStatus(
468               "Failure parsing JSON: Failed parsing otherData");
469         if (other == ReadDictRes::kNeedsMoreData)
470           return util::ErrStatus("Failure parsing JSON: otherData too large");
471         return ParseInternal(next, end, out);
472       } else {
473         // If we don't recognize the key, just ignore the rest of the trace and
474         // go to EOF.
475         // TODO(lalitm): do something better here.
476         position_ = TracePosition::kEof;
477         break;
478       }
479     }
480     case TracePosition::kSystemTraceEventsString: {
481       if (format_ != TraceFormat::kOuterDictionary) {
482         return util::ErrStatus(
483             "Failure parsing JSON: illegal format when parsing system events");
484       }
485 
486       while (next < end) {
487         std::string raw_line;
488         auto res = ReadOneSystemTraceLine(next, end, &raw_line, &next);
489         if (res == ReadSystemLineRes::kFatalError)
490           return util::ErrStatus(
491               "Failure parsing JSON: encountered fatal error");
492 
493         if (res == ReadSystemLineRes::kNeedsMoreData)
494           break;
495 
496         if (res == ReadSystemLineRes::kEndOfSystemTrace) {
497           position_ = TracePosition::kDictionaryKey;
498           return ParseInternal(next, end, out);
499         }
500 
501         if (base::StartsWith(raw_line, "#") || raw_line.empty())
502           continue;
503 
504         std::unique_ptr<SystraceLine> line(new SystraceLine());
505         util::Status status =
506             systrace_line_tokenizer_.Tokenize(raw_line, line.get());
507         if (!status.ok())
508           return status;
509         trace_sorter->PushSystraceLine(std::move(line));
510       }
511       break;
512     }
513     case TracePosition::kWaitingForMetadataDictionary: {
514       if (format_ != TraceFormat::kOuterDictionary) {
515         return util::ErrStatus(
516             "Failure parsing JSON: illegal format when parsing metadata");
517       }
518 
519       base::StringView unparsed;
520       const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
521       if (res == ReadDictRes::kEndOfArray)
522         return util::ErrStatus("Failure parsing JSON: encountered fatal error");
523       if (res == ReadDictRes::kEndOfTrace ||
524           res == ReadDictRes::kNeedsMoreData) {
525         break;
526       }
527 
528       // TODO(lalitm): read and ingest the relevant data inside |value|.
529       position_ = TracePosition::kDictionaryKey;
530       break;
531     }
532     case TracePosition::kTraceEventsArray: {
533       while (next < end) {
534         base::StringView unparsed;
535         const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
536         if (res == ReadDictRes::kEndOfTrace ||
537             res == ReadDictRes::kNeedsMoreData) {
538           break;
539         }
540 
541         if (res == ReadDictRes::kEndOfArray) {
542           position_ = format_ == TraceFormat::kOuterDictionary
543                           ? TracePosition::kDictionaryKey
544                           : TracePosition::kEof;
545           break;
546         }
547 
548         base::Optional<std::string> opt_raw_ts;
549         RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
550         base::Optional<int64_t> opt_ts =
551             opt_raw_ts ? json::CoerceToTs(*opt_raw_ts) : base::nullopt;
552         int64_t ts = 0;
553         if (opt_ts.has_value()) {
554           ts = opt_ts.value();
555         } else {
556           // Metadata events may omit ts. In all other cases error:
557           base::Optional<std::string> opt_raw_ph;
558           RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
559           if (!opt_raw_ph || *opt_raw_ph != "M") {
560             context_->storage->IncrementStats(stats::json_tokenizer_failure);
561             continue;
562           }
563         }
564         trace_sorter->PushJsonValue(ts, unparsed.ToStdString());
565       }
566       break;
567     }
568     case TracePosition::kEof: {
569       break;
570     }
571   }
572   *out = next;
573   return util::OkStatus();
574 }
575 
NotifyEndOfFile()576 void JsonTraceTokenizer::NotifyEndOfFile() {}
577 
578 }  // namespace trace_processor
579 }  // namespace perfetto
580