1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18
19 #include <memory>
20
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23
24 #include "perfetto/trace_processor/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_utils.h"
26 #include "src/trace_processor/sorter/trace_sorter.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/util/status_macros.h"
29
30 namespace perfetto {
31 namespace trace_processor {
32
33 namespace {
34
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)35 base::Status AppendUnescapedCharacter(char c,
36 bool is_escaping,
37 std::string* key) {
38 if (is_escaping) {
39 switch (c) {
40 case '"':
41 case '\\':
42 case '/':
43 key->push_back(c);
44 break;
45 case 'b':
46 key->push_back('\b');
47 break;
48 case 'f':
49 key->push_back('\f');
50 break;
51 case 'n':
52 key->push_back('\n');
53 break;
54 case 'r':
55 key->push_back('\r');
56 break;
57 case 't':
58 key->push_back('\t');
59 break;
60 case 'u':
61 // Just pass through \uxxxx escape sequences which JSON supports but is
62 // not worth the effort to parse as we never use them here.
63 key->append("\\u");
64 break;
65 default:
66 return base::ErrStatus("Illegal character in JSON");
67 }
68 } else if (c != '\\') {
69 key->push_back(c);
70 }
71 return base::OkStatus();
72 }
73
74 enum class ReadStringRes {
75 kEndOfString,
76 kNeedsMoreData,
77 kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80 const char* end,
81 std::string* key,
82 const char** next) {
83 if (start == end) {
84 return ReadStringRes::kNeedsMoreData;
85 }
86 if (*start != '"') {
87 return ReadStringRes::kFatalError;
88 }
89
90 bool is_escaping = false;
91 for (const char* s = start + 1; s < end; s++) {
92 // Control characters are not allowed in JSON strings.
93 if (iscntrl(*s))
94 return ReadStringRes::kFatalError;
95
96 // If we get a quote character end of the string.
97 if (*s == '"' && !is_escaping) {
98 *next = s + 1;
99 return ReadStringRes::kEndOfString;
100 }
101
102 base::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
103 if (!status.ok())
104 return ReadStringRes::kFatalError;
105
106 // If we're in a string and we see a backslash and the last character was
107 // not a backslash the next character is escaped:
108 is_escaping = *s == '\\' && !is_escaping;
109 }
110 return ReadStringRes::kNeedsMoreData;
111 }
112
113 enum class SkipValueRes {
114 kEndOfValue,
115 kNeedsMoreData,
116 kFatalError,
117 };
SkipOneJsonValue(const char * start,const char * end,const char ** next)118 SkipValueRes SkipOneJsonValue(const char* start,
119 const char* end,
120 const char** next) {
121 uint32_t brace_count = 0;
122 uint32_t bracket_count = 0;
123 for (const char* s = start; s < end; s++) {
124 if (*s == '"') {
125 // Because strings can contain {}[] characters, handle them separately
126 // before anything else.
127 std::string ignored;
128 const char* str_next = nullptr;
129 switch (ReadOneJsonString(s, end, &ignored, &str_next)) {
130 case ReadStringRes::kFatalError:
131 return SkipValueRes::kFatalError;
132 case ReadStringRes::kNeedsMoreData:
133 return SkipValueRes::kNeedsMoreData;
134 case ReadStringRes::kEndOfString:
135 // -1 as the loop body will +1 getting to the correct place.
136 s = str_next - 1;
137 break;
138 }
139 continue;
140 }
141 if (brace_count == 0 && bracket_count == 0 && (*s == ',' || *s == '}')) {
142 // Regardless of a comma or brace, this will be skipped by the caller so
143 // just set it to this character.
144 *next = s;
145 return SkipValueRes::kEndOfValue;
146 }
147 if (*s == '[') {
148 ++bracket_count;
149 continue;
150 }
151 if (*s == ']') {
152 if (bracket_count == 0) {
153 return SkipValueRes::kFatalError;
154 }
155 --bracket_count;
156 continue;
157 }
158 if (*s == '{') {
159 ++brace_count;
160 continue;
161 }
162 if (*s == '}') {
163 if (brace_count == 0) {
164 return SkipValueRes::kFatalError;
165 }
166 --brace_count;
167 continue;
168 }
169 }
170 return SkipValueRes::kNeedsMoreData;
171 }
172
SetOutAndReturn(const char * ptr,const char ** out)173 base::Status SetOutAndReturn(const char* ptr, const char** out) {
174 *out = ptr;
175 return base::OkStatus();
176 }
177
178 } // namespace
179
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)180 ReadDictRes ReadOneJsonDict(const char* start,
181 const char* end,
182 base::StringView* value,
183 const char** next) {
184 int braces = 0;
185 int square_brackets = 0;
186 const char* dict_begin = nullptr;
187 bool in_string = false;
188 bool is_escaping = false;
189 for (const char* s = start; s < end; s++) {
190 if (isspace(*s) || *s == ',')
191 continue;
192 if (*s == '"' && !is_escaping) {
193 in_string = !in_string;
194 continue;
195 }
196 if (in_string) {
197 // If we're in a string and we see a backslash and the last character was
198 // not a backslash the next character is escaped:
199 is_escaping = *s == '\\' && !is_escaping;
200 // If we're currently parsing a string we should ignore otherwise special
201 // characters:
202 continue;
203 }
204 if (*s == '{') {
205 if (braces == 0)
206 dict_begin = s;
207 braces++;
208 continue;
209 }
210 if (*s == '}') {
211 if (braces <= 0)
212 return ReadDictRes::kEndOfTrace;
213 if (--braces > 0)
214 continue;
215 size_t len = static_cast<size_t>((s + 1) - dict_begin);
216 *value = base::StringView(dict_begin, len);
217 *next = s + 1;
218 return ReadDictRes::kFoundDict;
219 }
220 if (*s == '[') {
221 square_brackets++;
222 continue;
223 }
224 if (*s == ']') {
225 if (square_brackets == 0) {
226 // We've reached the end of [traceEvents] array.
227 // There might be other top level keys in the json (e.g. metadata)
228 // after.
229 *next = s + 1;
230 return ReadDictRes::kEndOfArray;
231 }
232 square_brackets--;
233 }
234 }
235 return ReadDictRes::kNeedsMoreData;
236 }
237
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)238 ReadKeyRes ReadOneJsonKey(const char* start,
239 const char* end,
240 std::string* key,
241 const char** next) {
242 enum class NextToken {
243 kStringOrEndOfDict,
244 kColon,
245 kValue,
246 };
247
248 NextToken next_token = NextToken::kStringOrEndOfDict;
249 for (const char* s = start; s < end; s++) {
250 // Whitespace characters anywhere can be skipped.
251 if (isspace(*s))
252 continue;
253
254 switch (next_token) {
255 case NextToken::kStringOrEndOfDict: {
256 // If we see a closing brace, that means we've reached the end of the
257 // wrapping dictionary.
258 if (*s == '}') {
259 *next = s + 1;
260 return ReadKeyRes::kEndOfDictionary;
261 }
262
263 // If we see a comma separator, just ignore it.
264 if (*s == ',')
265 continue;
266
267 auto res = ReadOneJsonString(s, end, key, &s);
268 if (res == ReadStringRes::kFatalError)
269 return ReadKeyRes::kFatalError;
270 if (res == ReadStringRes::kNeedsMoreData)
271 return ReadKeyRes::kNeedsMoreData;
272
273 // We need to decrement from the pointer as the loop will increment
274 // it back up.
275 s--;
276 next_token = NextToken::kColon;
277 break;
278 }
279 case NextToken::kColon:
280 if (*s != ':')
281 return ReadKeyRes::kFatalError;
282 next_token = NextToken::kValue;
283 break;
284 case NextToken::kValue:
285 // Allowed value starting chars: [ { digit - "
286 // Also allowed: true, false, null. For simplicities sake, we only check
287 // against the first character as we're not trying to be super accurate.
288 if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
289 *s == 't' || *s == 'f' || *s == 'n') {
290 *next = s;
291 return ReadKeyRes::kFoundKey;
292 }
293 return ReadKeyRes::kFatalError;
294 }
295 }
296 return ReadKeyRes::kNeedsMoreData;
297 }
298
ExtractValueForJsonKey(base::StringView dict,const std::string & key,std::optional<std::string> * value)299 base::Status ExtractValueForJsonKey(base::StringView dict,
300 const std::string& key,
301 std::optional<std::string>* value) {
302 PERFETTO_DCHECK(dict.size() >= 2);
303
304 const char* start = dict.data();
305 const char* end = dict.data() + dict.size();
306
307 enum ExtractValueState {
308 kBeforeDict,
309 kInsideDict,
310 kAfterDict,
311 };
312
313 ExtractValueState state = kBeforeDict;
314 for (const char* s = start; s < end;) {
315 if (isspace(*s)) {
316 ++s;
317 continue;
318 }
319
320 if (state == kBeforeDict) {
321 if (*s == '{') {
322 ++s;
323 state = kInsideDict;
324 continue;
325 }
326 return base::ErrStatus("Unexpected character before JSON dict");
327 }
328
329 if (state == kAfterDict)
330 return base::ErrStatus("Unexpected character after JSON dict");
331
332 PERFETTO_DCHECK(state == kInsideDict);
333 PERFETTO_DCHECK(s < end);
334
335 if (*s == '}') {
336 ++s;
337 state = kAfterDict;
338 continue;
339 }
340
341 std::string current_key;
342 auto res = ReadOneJsonKey(s, end, ¤t_key, &s);
343 if (res == ReadKeyRes::kEndOfDictionary)
344 break;
345
346 if (res == ReadKeyRes::kFatalError) {
347 return base::ErrStatus(
348 "Failure parsing JSON: encountered fatal error while parsing key for "
349 "value");
350 }
351
352 if (res == ReadKeyRes::kNeedsMoreData) {
353 return base::ErrStatus("Failure parsing JSON: partial JSON dictionary");
354 }
355
356 PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
357
358 if (*s == '[') {
359 return base::ErrStatus(
360 "Failure parsing JSON: unsupported JSON dictionary with array");
361 }
362
363 std::string value_str;
364 if (*s == '{') {
365 base::StringView dict_str;
366 ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
367 if (dict_res == ReadDictRes::kNeedsMoreData ||
368 dict_res == ReadDictRes::kEndOfArray ||
369 dict_res == ReadDictRes::kEndOfTrace) {
370 return base::ErrStatus(
371 "Failure parsing JSON: unable to parse dictionary");
372 }
373 value_str = dict_str.ToStdString();
374 } else if (*s == '"') {
375 auto str_res = ReadOneJsonString(s, end, &value_str, &s);
376 if (str_res == ReadStringRes::kNeedsMoreData ||
377 str_res == ReadStringRes::kFatalError) {
378 return base::ErrStatus("Failure parsing JSON: unable to parse string");
379 }
380 } else {
381 const char* value_start = s;
382 const char* value_end = end;
383 for (; s < end; ++s) {
384 if (*s == ',' || isspace(*s) || *s == '}') {
385 value_end = s;
386 break;
387 }
388 }
389 value_str = std::string(value_start, value_end);
390 }
391
392 if (key == current_key) {
393 *value = value_str;
394 return base::OkStatus();
395 }
396 }
397
398 if (state != kAfterDict)
399 return base::ErrStatus("Failure parsing JSON: malformed dictionary");
400
401 *value = std::nullopt;
402 return base::OkStatus();
403 }
404
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)405 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
406 const char* end,
407 std::string* line,
408 const char** next) {
409 bool is_escaping = false;
410 for (const char* s = start; s < end; s++) {
411 // If we get a quote character and we're not escaping, we are done with the
412 // system trace string.
413 if (*s == '"' && !is_escaping) {
414 *next = s + 1;
415 return ReadSystemLineRes::kEndOfSystemTrace;
416 }
417
418 // If we are escaping n, that means this is a new line which is a delimiter
419 // for a system trace line.
420 if (*s == 'n' && is_escaping) {
421 *next = s + 1;
422 return ReadSystemLineRes::kFoundLine;
423 }
424
425 base::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
426 if (!status.ok())
427 return ReadSystemLineRes::kFatalError;
428
429 // If we're in a string and we see a backslash and the last character was
430 // not a backslash the next character is escaped:
431 is_escaping = *s == '\\' && !is_escaping;
432 }
433 return ReadSystemLineRes::kNeedsMoreData;
434 }
435
JsonTraceTokenizer(TraceProcessorContext * ctx)436 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
437 : context_(ctx) {}
438 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
439
Parse(TraceBlobView blob)440 base::Status JsonTraceTokenizer::Parse(TraceBlobView blob) {
441 PERFETTO_DCHECK(json::IsJsonSupported());
442
443 buffer_.insert(buffer_.end(), blob.data(), blob.data() + blob.size());
444 const char* buf = buffer_.data();
445 const char* next = buf;
446 const char* end = buf + buffer_.size();
447
448 if (offset_ == 0) {
449 // Strip leading whitespace.
450 while (next != end && isspace(*next)) {
451 next++;
452 }
453 if (next == end) {
454 return base::ErrStatus(
455 "Failure parsing JSON: first chunk has only whitespace");
456 }
457
458 // Trace could begin in any of these ways:
459 // {"traceEvents":[{
460 // { "traceEvents": [{
461 // [{
462 if (*next != '{' && *next != '[') {
463 return base::ErrStatus(
464 "Failure parsing JSON: first non-whitespace character is not [ or {");
465 }
466
467 // Figure out the format of the JSON file based on the first non-whitespace
468 // character.
469 format_ = *next == '{' ? TraceFormat::kOuterDictionary
470 : TraceFormat::kOnlyTraceEvents;
471
472 // Skip the '[' or '{' character.
473 next++;
474
475 // Set our current position based on the format of the trace.
476 position_ = format_ == TraceFormat::kOuterDictionary
477 ? TracePosition::kDictionaryKey
478 : TracePosition::kInsideTraceEventsArray;
479 }
480 RETURN_IF_ERROR(ParseInternal(next, end, &next));
481
482 offset_ += static_cast<uint64_t>(next - buf);
483 buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
484 return base::OkStatus();
485 }
486
ParseInternal(const char * start,const char * end,const char ** out)487 base::Status JsonTraceTokenizer::ParseInternal(const char* start,
488 const char* end,
489 const char** out) {
490 PERFETTO_DCHECK(json::IsJsonSupported());
491
492 switch (position_) {
493 case TracePosition::kDictionaryKey:
494 return HandleDictionaryKey(start, end, out);
495 case TracePosition::kInsideSystemTraceEventsString:
496 return HandleSystemTraceEvent(start, end, out);
497 case TracePosition::kInsideTraceEventsArray:
498 return HandleTraceEvent(start, end, out);
499 case TracePosition::kEof: {
500 return start == end
501 ? base::OkStatus()
502 : base::ErrStatus(
503 "Failure parsing JSON: tried to parse data after EOF");
504 }
505 }
506 PERFETTO_FATAL("For GCC");
507 }
508
HandleTraceEvent(const char * start,const char * end,const char ** out)509 base::Status JsonTraceTokenizer::HandleTraceEvent(const char* start,
510 const char* end,
511 const char** out) {
512 const char* next = start;
513 while (next < end) {
514 base::StringView unparsed;
515 switch (ReadOneJsonDict(next, end, &unparsed, &next)) {
516 case ReadDictRes::kEndOfArray: {
517 if (format_ == TraceFormat::kOnlyTraceEvents) {
518 position_ = TracePosition::kEof;
519 return SetOutAndReturn(next, out);
520 }
521
522 position_ = TracePosition::kDictionaryKey;
523 return ParseInternal(next, end, out);
524 }
525 case ReadDictRes::kEndOfTrace:
526 position_ = TracePosition::kEof;
527 return SetOutAndReturn(next, out);
528 case ReadDictRes::kNeedsMoreData:
529 return SetOutAndReturn(next, out);
530 case ReadDictRes::kFoundDict:
531 break;
532 }
533
534 std::optional<std::string> opt_raw_ts;
535 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
536 std::optional<int64_t> opt_ts =
537 opt_raw_ts ? json::CoerceToTs(*opt_raw_ts) : std::nullopt;
538 int64_t ts = 0;
539 if (opt_ts.has_value()) {
540 ts = opt_ts.value();
541 } else {
542 // Metadata events may omit ts. In all other cases error:
543 std::optional<std::string> opt_raw_ph;
544 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
545 if (!opt_raw_ph || *opt_raw_ph != "M") {
546 context_->storage->IncrementStats(stats::json_tokenizer_failure);
547 continue;
548 }
549 }
550 context_->sorter->PushJsonValue(ts, unparsed.ToStdString());
551 }
552 return SetOutAndReturn(next, out);
553 }
554
HandleDictionaryKey(const char * start,const char * end,const char ** out)555 base::Status JsonTraceTokenizer::HandleDictionaryKey(const char* start,
556 const char* end,
557 const char** out) {
558 if (format_ != TraceFormat::kOuterDictionary) {
559 return base::ErrStatus(
560 "Failure parsing JSON: illegal format when parsing dictionary key");
561 }
562
563 const char* next = start;
564 std::string key;
565 switch (ReadOneJsonKey(start, end, &key, &next)) {
566 case ReadKeyRes::kFatalError:
567 return base::ErrStatus(
568 "Failure parsing JSON: encountered fatal error while parsing key");
569 case ReadKeyRes::kEndOfDictionary:
570 position_ = TracePosition::kEof;
571 return SetOutAndReturn(next, out);
572 case ReadKeyRes::kNeedsMoreData:
573 // If we didn't manage to read the key we need to set |out| to |start|
574 // (*not* |next|) to keep the state machine happy.
575 return SetOutAndReturn(start, out);
576 case ReadKeyRes::kFoundKey:
577 break;
578 }
579
580 // ReadOneJsonKey should ensure that the first character of the value is
581 // available.
582 PERFETTO_CHECK(next < end);
583
584 if (key == "traceEvents") {
585 // Skip the [ character opening the array.
586 if (*next != '[') {
587 return base::ErrStatus(
588 "Failure parsing JSON: traceEvents is not an array.");
589 }
590 next++;
591
592 position_ = TracePosition::kInsideTraceEventsArray;
593 return ParseInternal(next, end, out);
594 }
595
596 if (key == "systemTraceEvents") {
597 // Skip the " character opening the string.
598 if (*next != '"') {
599 return base::ErrStatus(
600 "Failure parsing JSON: systemTraceEvents is not an string.");
601 }
602 next++;
603
604 position_ = TracePosition::kInsideSystemTraceEventsString;
605 return ParseInternal(next, end, out);
606 }
607
608 if (key == "displayTimeUnit") {
609 std::string time_unit;
610 auto result = ReadOneJsonString(next, end, &time_unit, &next);
611 if (result == ReadStringRes::kFatalError)
612 return base::ErrStatus("Could not parse displayTimeUnit");
613 context_->storage->IncrementStats(stats::json_display_time_unit);
614 return ParseInternal(next, end, out);
615 }
616
617 // If we don't know the key for this JSON value just skip it.
618 switch (SkipOneJsonValue(next, end, &next)) {
619 case SkipValueRes::kFatalError:
620 return base::ErrStatus(
621 "Failure parsing JSON: error while parsing value for key %s",
622 key.c_str());
623 case SkipValueRes::kNeedsMoreData:
624 // If we didn't manage to read the key *and* the value, we need to set
625 // |out| to |start| (*not* |next|) to keep the state machine happy (as
626 // we expect to always see a key before the value).
627 return SetOutAndReturn(start, out);
628 case SkipValueRes::kEndOfValue:
629 return ParseInternal(next, end, out);
630 }
631 PERFETTO_FATAL("For GCC");
632 }
633
HandleSystemTraceEvent(const char * start,const char * end,const char ** out)634 base::Status JsonTraceTokenizer::HandleSystemTraceEvent(const char* start,
635 const char* end,
636 const char** out) {
637 if (format_ != TraceFormat::kOuterDictionary) {
638 return base::ErrStatus(
639 "Failure parsing JSON: illegal format when parsing system events");
640 }
641
642 const char* next = start;
643 while (next < end) {
644 std::string raw_line;
645 switch (ReadOneSystemTraceLine(next, end, &raw_line, &next)) {
646 case ReadSystemLineRes::kFatalError:
647 return base::ErrStatus(
648 "Failure parsing JSON: encountered fatal error while parsing "
649 "event inside trace event string");
650 case ReadSystemLineRes::kNeedsMoreData:
651 return SetOutAndReturn(next, out);
652 case ReadSystemLineRes::kEndOfSystemTrace:
653 position_ = TracePosition::kDictionaryKey;
654 return ParseInternal(next, end, out);
655 case ReadSystemLineRes::kFoundLine:
656 break;
657 }
658
659 if (base::StartsWith(raw_line, "#") || raw_line.empty())
660 continue;
661
662 SystraceLine line;
663 RETURN_IF_ERROR(systrace_line_tokenizer_.Tokenize(raw_line, &line));
664 context_->sorter->PushSystraceLine(std::move(line));
665 }
666 return SetOutAndReturn(next, out);
667 }
668
NotifyEndOfFile()669 void JsonTraceTokenizer::NotifyEndOfFile() {
670 PERFETTO_DCHECK(position_ == TracePosition::kEof);
671 }
672
673 } // namespace trace_processor
674 } // namespace perfetto
675