1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18
19 #include <memory>
20
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23
24 #include "src/trace_processor/importers/common/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_tracker.h"
26 #include "src/trace_processor/importers/json/json_utils.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/trace_sorter.h"
29 #include "src/trace_processor/util/status_macros.h"
30
31 namespace perfetto {
32 namespace trace_processor {
33
34 namespace {
35
36 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
37
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)38 util::Status AppendUnescapedCharacter(char c,
39 bool is_escaping,
40 std::string* key) {
41 if (is_escaping) {
42 switch (c) {
43 case '"':
44 case '\\':
45 case '/':
46 key->push_back(c);
47 break;
48 case 'b':
49 key->push_back('\b');
50 break;
51 case 'f':
52 key->push_back('\f');
53 break;
54 case 'n':
55 key->push_back('\n');
56 break;
57 case 'r':
58 key->push_back('\r');
59 break;
60 case 't':
61 key->push_back('\t');
62 break;
63 default:
64 // We don't support any other escape sequences (concretely \uxxxx
65 // which JSON supports but is too much effort for us to parse).
66 return util::ErrStatus("Illegal character in JSON");
67 }
68 } else if (c != '\\') {
69 key->push_back(c);
70 }
71 return util::OkStatus();
72 }
73
74 enum class ReadStringRes {
75 kEndOfString,
76 kNeedsMoreData,
77 kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80 const char* end,
81 std::string* key,
82 const char** next) {
83 bool is_escaping = false;
84 for (const char* s = start; s < end; s++) {
85 // Control characters are not allowed in JSON strings.
86 if (iscntrl(*s))
87 return ReadStringRes::kFatalError;
88
89 // If we get a quote character end of the string.
90 if (*s == '"' && !is_escaping) {
91 *next = s + 1;
92 return ReadStringRes::kEndOfString;
93 }
94
95 util::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
96 if (!status.ok())
97 return ReadStringRes::kFatalError;
98
99 // If we're in a string and we see a backslash and the last character was
100 // not a backslash the next character is escaped:
101 is_escaping = *s == '\\' && !is_escaping;
102 }
103 return ReadStringRes::kNeedsMoreData;
104 }
105
106 #endif // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
107
108 } // namespace
109
110 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)111 ReadDictRes ReadOneJsonDict(const char* start,
112 const char* end,
113 base::StringView* value,
114 const char** next) {
115 int braces = 0;
116 int square_brackets = 0;
117 const char* dict_begin = nullptr;
118 bool in_string = false;
119 bool is_escaping = false;
120 for (const char* s = start; s < end; s++) {
121 if (isspace(*s) || *s == ',')
122 continue;
123 if (*s == '"' && !is_escaping) {
124 in_string = !in_string;
125 continue;
126 }
127 if (in_string) {
128 // If we're in a string and we see a backslash and the last character was
129 // not a backslash the next character is escaped:
130 is_escaping = *s == '\\' && !is_escaping;
131 // If we're currently parsing a string we should ignore otherwise special
132 // characters:
133 continue;
134 }
135 if (*s == '{') {
136 if (braces == 0)
137 dict_begin = s;
138 braces++;
139 continue;
140 }
141 if (*s == '}') {
142 if (braces <= 0)
143 return ReadDictRes::kEndOfTrace;
144 if (--braces > 0)
145 continue;
146 size_t len = static_cast<size_t>((s + 1) - dict_begin);
147 *value = base::StringView(dict_begin, len);
148 *next = s + 1;
149 return ReadDictRes::kFoundDict;
150 }
151 if (*s == '[') {
152 square_brackets++;
153 continue;
154 }
155 if (*s == ']') {
156 if (square_brackets == 0) {
157 // We've reached the end of [traceEvents] array.
158 // There might be other top level keys in the json (e.g. metadata)
159 // after.
160 *next = s + 1;
161 return ReadDictRes::kEndOfArray;
162 }
163 square_brackets--;
164 }
165 }
166 return ReadDictRes::kNeedsMoreData;
167 }
168
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)169 ReadKeyRes ReadOneJsonKey(const char* start,
170 const char* end,
171 std::string* key,
172 const char** next) {
173 enum class NextToken {
174 kStringOrEndOfDict,
175 kColon,
176 kValue,
177 };
178
179 NextToken next_token = NextToken::kStringOrEndOfDict;
180 for (const char* s = start; s < end; s++) {
181 // Whitespace characters anywhere can be skipped.
182 if (isspace(*s))
183 continue;
184
185 switch (next_token) {
186 case NextToken::kStringOrEndOfDict: {
187 // If we see a closing brace, that means we've reached the end of the
188 // wrapping dictionary.
189 if (*s == '}') {
190 *next = s + 1;
191 return ReadKeyRes::kEndOfDictionary;
192 }
193
194 // If we see a comma separator, just ignore it.
195 if (*s == ',')
196 continue;
197
198 // If we see anything else but a quote character here, this cannot be a
199 // valid key.
200 if (*s != '"')
201 return ReadKeyRes::kFatalError;
202
203 auto res = ReadOneJsonString(s + 1, end, key, &s);
204 if (res == ReadStringRes::kFatalError)
205 return ReadKeyRes::kFatalError;
206 if (res == ReadStringRes::kNeedsMoreData)
207 return ReadKeyRes::kNeedsMoreData;
208
209 // We need to decrement from the pointer as the loop will increment
210 // it back up.
211 s--;
212 next_token = NextToken::kColon;
213 break;
214 }
215 case NextToken::kColon:
216 if (*s != ':')
217 return ReadKeyRes::kFatalError;
218 next_token = NextToken::kValue;
219 break;
220 case NextToken::kValue:
221 // Allowed value starting chars: [ { digit - "
222 // Also allowed: true, false, null. For simplicities sake, we only check
223 // against the first character as we're not trying to be super accurate.
224 if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
225 *s == 't' || *s == 'f' || *s == 'n') {
226 *next = s;
227 return ReadKeyRes::kFoundKey;
228 }
229 return ReadKeyRes::kFatalError;
230 }
231 }
232 return ReadKeyRes::kNeedsMoreData;
233 }
234
ExtractValueForJsonKey(base::StringView dict,const std::string & key,base::Optional<std::string> * value)235 util::Status ExtractValueForJsonKey(base::StringView dict,
236 const std::string& key,
237 base::Optional<std::string>* value) {
238 PERFETTO_DCHECK(dict.size() >= 2);
239
240 const char* start = dict.data();
241 const char* end = dict.data() + dict.size();
242
243 enum ExtractValueState {
244 kBeforeDict,
245 kInsideDict,
246 kAfterDict,
247 };
248
249 ExtractValueState state = kBeforeDict;
250 for (const char* s = start; s < end;) {
251 if (isspace(*s)) {
252 ++s;
253 continue;
254 }
255
256 if (state == kBeforeDict) {
257 if (*s == '{') {
258 ++s;
259 state = kInsideDict;
260 continue;
261 }
262 return util::ErrStatus("Unexpected character before JSON dict");
263 }
264
265 if (state == kAfterDict)
266 return util::ErrStatus("Unexpected character after JSON dict");
267
268 PERFETTO_DCHECK(state == kInsideDict);
269 PERFETTO_DCHECK(s < end);
270
271 if (*s == '}') {
272 ++s;
273 state = kAfterDict;
274 continue;
275 }
276
277 std::string current_key;
278 auto res = ReadOneJsonKey(s, end, ¤t_key, &s);
279 if (res == ReadKeyRes::kEndOfDictionary)
280 break;
281
282 if (res == ReadKeyRes::kFatalError)
283 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
284
285 if (res == ReadKeyRes::kNeedsMoreData) {
286 return util::ErrStatus("Failure parsing JSON: partial JSON dictionary");
287 }
288
289 PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
290
291 if (*s == '[') {
292 return util::ErrStatus(
293 "Failure parsing JSON: unsupported JSON dictionary with array");
294 }
295
296 std::string value_str;
297 if (*s == '{') {
298 base::StringView dict_str;
299 ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
300 if (dict_res == ReadDictRes::kNeedsMoreData ||
301 dict_res == ReadDictRes::kEndOfArray ||
302 dict_res == ReadDictRes::kEndOfTrace) {
303 return util::ErrStatus(
304 "Failure parsing JSON: unable to parse dictionary");
305 }
306 value_str = dict_str.ToStdString();
307 } else if (*s == '"') {
308 auto str_res = ReadOneJsonString(s + 1, end, &value_str, &s);
309 if (str_res == ReadStringRes::kNeedsMoreData ||
310 str_res == ReadStringRes::kFatalError) {
311 return util::ErrStatus("Failure parsing JSON: unable to parse string");
312 }
313 } else {
314 const char* value_start = s;
315 const char* value_end = end;
316 for (; s < end; ++s) {
317 if (*s == ',' || isspace(*s) || *s == '}') {
318 value_end = s;
319 break;
320 }
321 }
322 value_str = std::string(value_start, value_end);
323 }
324
325 if (key == current_key) {
326 *value = value_str;
327 return util::OkStatus();
328 }
329 }
330
331 if (state != kAfterDict)
332 return util::ErrStatus("Failure parsing JSON: malformed dictionary");
333
334 *value = base::nullopt;
335 return util::OkStatus();
336 }
337
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)338 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
339 const char* end,
340 std::string* line,
341 const char** next) {
342 bool is_escaping = false;
343 for (const char* s = start; s < end; s++) {
344 // If we get a quote character and we're not escaping, we are done with the
345 // system trace string.
346 if (*s == '"' && !is_escaping) {
347 *next = s + 1;
348 return ReadSystemLineRes::kEndOfSystemTrace;
349 }
350
351 // If we are escaping n, that means this is a new line which is a delimiter
352 // for a system trace line.
353 if (*s == 'n' && is_escaping) {
354 *next = s + 1;
355 return ReadSystemLineRes::kFoundLine;
356 }
357
358 util::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
359 if (!status.ok())
360 return ReadSystemLineRes::kFatalError;
361
362 // If we're in a string and we see a backslash and the last character was
363 // not a backslash the next character is escaped:
364 is_escaping = *s == '\\' && !is_escaping;
365 }
366 return ReadSystemLineRes::kNeedsMoreData;
367 }
368 #endif // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
369
JsonTraceTokenizer(TraceProcessorContext * ctx)370 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
371 : context_(ctx) {}
372 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
373
Parse(std::unique_ptr<uint8_t[]> data,size_t size)374 util::Status JsonTraceTokenizer::Parse(std::unique_ptr<uint8_t[]> data,
375 size_t size) {
376 PERFETTO_DCHECK(json::IsJsonSupported());
377
378 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
379 buffer_.insert(buffer_.end(), data.get(), data.get() + size);
380 const char* buf = buffer_.data();
381 const char* next = buf;
382 const char* end = buf + buffer_.size();
383
384 JsonTracker* json_tracker = JsonTracker::GetOrCreate(context_);
385
386 // It's possible the displayTimeUnit key is at the end of the json
387 // file so to be correct we ought to parse the whole file looking
388 // for this key before parsing any events however this would require
389 // two passes on the file so for now we only handle displayTimeUnit
390 // correctly if it is at the beginning of the file.
391 const base::StringView view(buf, size);
392 if (view.find("\"displayTimeUnit\":\"ns\"") != base::StringView::npos) {
393 json_tracker->SetTimeUnit(json::TimeUnit::kNs);
394 } else if (view.find("\"displayTimeUnit\":\"ms\"") !=
395 base::StringView::npos) {
396 json_tracker->SetTimeUnit(json::TimeUnit::kMs);
397 }
398
399 if (offset_ == 0) {
400 // Strip leading whitespace.
401 while (next != end && isspace(*next)) {
402 next++;
403 }
404 if (next == end) {
405 return util::ErrStatus(
406 "Failure parsing JSON: first chunk has only whitespace");
407 }
408
409 // Trace could begin in any of these ways:
410 // {"traceEvents":[{
411 // { "traceEvents": [{
412 // [{
413 if (*next != '{' && *next != '[') {
414 return util::ErrStatus(
415 "Failure parsing JSON: first non-whitespace character is not [ or {");
416 }
417
418 // Figure out the format of the JSON file based on the first non-whitespace
419 // character.
420 format_ = *next == '{' ? TraceFormat::kOuterDictionary
421 : TraceFormat::kOnlyTraceEvents;
422
423 // Skip the '[' or '{' character.
424 next++;
425
426 // Set our current position based on the format of the trace.
427 position_ = format_ == TraceFormat::kOuterDictionary
428 ? TracePosition::kDictionaryKey
429 : TracePosition::kTraceEventsArray;
430 }
431
432 auto status = ParseInternal(next, end, &next);
433 if (!status.ok())
434 return status;
435
436 offset_ += static_cast<uint64_t>(next - buf);
437 buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
438 return util::OkStatus();
439 #else
440 perfetto::base::ignore_result(data);
441 perfetto::base::ignore_result(size);
442 perfetto::base::ignore_result(context_);
443 perfetto::base::ignore_result(format_);
444 perfetto::base::ignore_result(position_);
445 perfetto::base::ignore_result(offset_);
446 return util::ErrStatus("Cannot parse JSON trace due to missing JSON support");
447 #endif // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
448 }
449
450 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
ParseInternal(const char * start,const char * end,const char ** out)451 util::Status JsonTraceTokenizer::ParseInternal(const char* start,
452 const char* end,
453 const char** out) {
454 PERFETTO_DCHECK(json::IsJsonSupported());
455 JsonTracker* json_tracker = JsonTracker::GetOrCreate(context_);
456 auto* trace_sorter = context_->sorter.get();
457
458 const char* next = start;
459 switch (position_) {
460 case TracePosition::kDictionaryKey: {
461 if (format_ != TraceFormat::kOuterDictionary) {
462 return util::ErrStatus(
463 "Failure parsing JSON: illegal format when parsing dictionary key");
464 }
465
466 std::string key;
467 auto res = ReadOneJsonKey(start, end, &key, &next);
468 if (res == ReadKeyRes::kFatalError)
469 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
470
471 if (res == ReadKeyRes::kEndOfDictionary ||
472 res == ReadKeyRes::kNeedsMoreData) {
473 break;
474 }
475
476 if (key == "traceEvents") {
477 position_ = TracePosition::kTraceEventsArray;
478 return ParseInternal(next + 1, end, out);
479 } else if (key == "systemTraceEvents") {
480 position_ = TracePosition::kSystemTraceEventsString;
481 return ParseInternal(next + 1, end, out);
482 } else if (key == "metadata") {
483 position_ = TracePosition::kWaitingForMetadataDictionary;
484 return ParseInternal(next + 1, end, out);
485 } else if (key == "displayTimeUnit") {
486 std::string time_unit;
487 auto string_res = ReadOneJsonString(next + 1, end, &time_unit, &next);
488 if (string_res == ReadStringRes::kFatalError)
489 return util::ErrStatus("Could not parse displayTimeUnit");
490 if (string_res == ReadStringRes::kNeedsMoreData)
491 return util::ErrStatus("displayTimeUnit too large");
492 if (time_unit != "ms" && time_unit != "ns")
493 return util::ErrStatus("displayTimeUnit unknown");
494 return ParseInternal(next, end, out);
495 } else {
496 // If we don't recognize the key, just ignore the rest of the trace and
497 // go to EOF.
498 // TODO(lalitm): do something better here.
499 position_ = TracePosition::kEof;
500 break;
501 }
502 }
503 case TracePosition::kSystemTraceEventsString: {
504 if (format_ != TraceFormat::kOuterDictionary) {
505 return util::ErrStatus(
506 "Failure parsing JSON: illegal format when parsing system events");
507 }
508
509 while (next < end) {
510 std::string raw_line;
511 auto res = ReadOneSystemTraceLine(next, end, &raw_line, &next);
512 if (res == ReadSystemLineRes::kFatalError)
513 return util::ErrStatus(
514 "Failure parsing JSON: encountered fatal error");
515
516 if (res == ReadSystemLineRes::kNeedsMoreData)
517 break;
518
519 if (res == ReadSystemLineRes::kEndOfSystemTrace) {
520 position_ = TracePosition::kDictionaryKey;
521 return ParseInternal(next, end, out);
522 }
523
524 if (base::StartsWith(raw_line, "#") || raw_line.empty())
525 continue;
526
527 std::unique_ptr<SystraceLine> line(new SystraceLine());
528 util::Status status =
529 systrace_line_tokenizer_.Tokenize(raw_line, line.get());
530 if (!status.ok())
531 return status;
532 trace_sorter->PushSystraceLine(std::move(line));
533 }
534 break;
535 }
536 case TracePosition::kWaitingForMetadataDictionary: {
537 if (format_ != TraceFormat::kOuterDictionary) {
538 return util::ErrStatus(
539 "Failure parsing JSON: illegal format when parsing metadata");
540 }
541
542 base::StringView unparsed;
543 const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
544 if (res == ReadDictRes::kEndOfArray)
545 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
546 if (res == ReadDictRes::kEndOfTrace ||
547 res == ReadDictRes::kNeedsMoreData) {
548 break;
549 }
550
551 // TODO(lalitm): read and ingest the relevant data inside |value|.
552 position_ = TracePosition::kDictionaryKey;
553 break;
554 }
555 case TracePosition::kTraceEventsArray: {
556 while (next < end) {
557 base::StringView unparsed;
558 const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
559 if (res == ReadDictRes::kEndOfTrace ||
560 res == ReadDictRes::kNeedsMoreData) {
561 break;
562 }
563
564 if (res == ReadDictRes::kEndOfArray) {
565 position_ = format_ == TraceFormat::kOuterDictionary
566 ? TracePosition::kDictionaryKey
567 : TracePosition::kEof;
568 break;
569 }
570
571 base::Optional<std::string> opt_raw_ts;
572 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
573 base::Optional<int64_t> opt_ts =
574 opt_raw_ts ? json_tracker->CoerceToTs(*opt_raw_ts) : base::nullopt;
575 int64_t ts = 0;
576 if (opt_ts.has_value()) {
577 ts = opt_ts.value();
578 } else {
579 // Metadata events may omit ts. In all other cases error:
580 base::Optional<std::string> opt_raw_ph;
581 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
582 if (!opt_raw_ph || *opt_raw_ph != "M") {
583 context_->storage->IncrementStats(stats::json_tokenizer_failure);
584 continue;
585 }
586 }
587 trace_sorter->PushJsonValue(ts, unparsed.ToStdString());
588 }
589 break;
590 }
591 case TracePosition::kEof: {
592 break;
593 }
594 }
595 *out = next;
596 return util::OkStatus();
597 }
598 #endif // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
599
NotifyEndOfFile()600 void JsonTraceTokenizer::NotifyEndOfFile() {}
601
602 } // namespace trace_processor
603 } // namespace perfetto
604