1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18
19 #include <memory>
20
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23
24 #include "perfetto/trace_processor/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_utils.h"
26 #include "src/trace_processor/storage/stats.h"
27 #include "src/trace_processor/trace_sorter.h"
28 #include "src/trace_processor/util/status_macros.h"
29
30 namespace perfetto {
31 namespace trace_processor {
32
33 namespace {
34
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)35 util::Status AppendUnescapedCharacter(char c,
36 bool is_escaping,
37 std::string* key) {
38 if (is_escaping) {
39 switch (c) {
40 case '"':
41 case '\\':
42 case '/':
43 key->push_back(c);
44 break;
45 case 'b':
46 key->push_back('\b');
47 break;
48 case 'f':
49 key->push_back('\f');
50 break;
51 case 'n':
52 key->push_back('\n');
53 break;
54 case 'r':
55 key->push_back('\r');
56 break;
57 case 't':
58 key->push_back('\t');
59 break;
60 case 'u':
61 // Just pass through \uxxxx escape sequences which JSON supports but is
62 // not worth the effort to parse as we never use them here.
63 key->append("\\u");
64 break;
65 default:
66 return util::ErrStatus("Illegal character in JSON");
67 }
68 } else if (c != '\\') {
69 key->push_back(c);
70 }
71 return util::OkStatus();
72 }
73
74 enum class ReadStringRes {
75 kEndOfString,
76 kNeedsMoreData,
77 kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80 const char* end,
81 std::string* key,
82 const char** next) {
83 bool is_escaping = false;
84 for (const char* s = start; s < end; s++) {
85 // Control characters are not allowed in JSON strings.
86 if (iscntrl(*s))
87 return ReadStringRes::kFatalError;
88
89 // If we get a quote character end of the string.
90 if (*s == '"' && !is_escaping) {
91 *next = s + 1;
92 return ReadStringRes::kEndOfString;
93 }
94
95 util::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
96 if (!status.ok())
97 return ReadStringRes::kFatalError;
98
99 // If we're in a string and we see a backslash and the last character was
100 // not a backslash the next character is escaped:
101 is_escaping = *s == '\\' && !is_escaping;
102 }
103 return ReadStringRes::kNeedsMoreData;
104 }
105
106 } // namespace
107
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)108 ReadDictRes ReadOneJsonDict(const char* start,
109 const char* end,
110 base::StringView* value,
111 const char** next) {
112 int braces = 0;
113 int square_brackets = 0;
114 const char* dict_begin = nullptr;
115 bool in_string = false;
116 bool is_escaping = false;
117 for (const char* s = start; s < end; s++) {
118 if (isspace(*s) || *s == ',')
119 continue;
120 if (*s == '"' && !is_escaping) {
121 in_string = !in_string;
122 continue;
123 }
124 if (in_string) {
125 // If we're in a string and we see a backslash and the last character was
126 // not a backslash the next character is escaped:
127 is_escaping = *s == '\\' && !is_escaping;
128 // If we're currently parsing a string we should ignore otherwise special
129 // characters:
130 continue;
131 }
132 if (*s == '{') {
133 if (braces == 0)
134 dict_begin = s;
135 braces++;
136 continue;
137 }
138 if (*s == '}') {
139 if (braces <= 0)
140 return ReadDictRes::kEndOfTrace;
141 if (--braces > 0)
142 continue;
143 size_t len = static_cast<size_t>((s + 1) - dict_begin);
144 *value = base::StringView(dict_begin, len);
145 *next = s + 1;
146 return ReadDictRes::kFoundDict;
147 }
148 if (*s == '[') {
149 square_brackets++;
150 continue;
151 }
152 if (*s == ']') {
153 if (square_brackets == 0) {
154 // We've reached the end of [traceEvents] array.
155 // There might be other top level keys in the json (e.g. metadata)
156 // after.
157 *next = s + 1;
158 return ReadDictRes::kEndOfArray;
159 }
160 square_brackets--;
161 }
162 }
163 return ReadDictRes::kNeedsMoreData;
164 }
165
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)166 ReadKeyRes ReadOneJsonKey(const char* start,
167 const char* end,
168 std::string* key,
169 const char** next) {
170 enum class NextToken {
171 kStringOrEndOfDict,
172 kColon,
173 kValue,
174 };
175
176 NextToken next_token = NextToken::kStringOrEndOfDict;
177 for (const char* s = start; s < end; s++) {
178 // Whitespace characters anywhere can be skipped.
179 if (isspace(*s))
180 continue;
181
182 switch (next_token) {
183 case NextToken::kStringOrEndOfDict: {
184 // If we see a closing brace, that means we've reached the end of the
185 // wrapping dictionary.
186 if (*s == '}') {
187 *next = s + 1;
188 return ReadKeyRes::kEndOfDictionary;
189 }
190
191 // If we see a comma separator, just ignore it.
192 if (*s == ',')
193 continue;
194
195 // If we see anything else but a quote character here, this cannot be a
196 // valid key.
197 if (*s != '"')
198 return ReadKeyRes::kFatalError;
199
200 auto res = ReadOneJsonString(s + 1, end, key, &s);
201 if (res == ReadStringRes::kFatalError)
202 return ReadKeyRes::kFatalError;
203 if (res == ReadStringRes::kNeedsMoreData)
204 return ReadKeyRes::kNeedsMoreData;
205
206 // We need to decrement from the pointer as the loop will increment
207 // it back up.
208 s--;
209 next_token = NextToken::kColon;
210 break;
211 }
212 case NextToken::kColon:
213 if (*s != ':')
214 return ReadKeyRes::kFatalError;
215 next_token = NextToken::kValue;
216 break;
217 case NextToken::kValue:
218 // Allowed value starting chars: [ { digit - "
219 // Also allowed: true, false, null. For simplicities sake, we only check
220 // against the first character as we're not trying to be super accurate.
221 if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
222 *s == 't' || *s == 'f' || *s == 'n') {
223 *next = s;
224 return ReadKeyRes::kFoundKey;
225 }
226 return ReadKeyRes::kFatalError;
227 }
228 }
229 return ReadKeyRes::kNeedsMoreData;
230 }
231
ExtractValueForJsonKey(base::StringView dict,const std::string & key,base::Optional<std::string> * value)232 util::Status ExtractValueForJsonKey(base::StringView dict,
233 const std::string& key,
234 base::Optional<std::string>* value) {
235 PERFETTO_DCHECK(dict.size() >= 2);
236
237 const char* start = dict.data();
238 const char* end = dict.data() + dict.size();
239
240 enum ExtractValueState {
241 kBeforeDict,
242 kInsideDict,
243 kAfterDict,
244 };
245
246 ExtractValueState state = kBeforeDict;
247 for (const char* s = start; s < end;) {
248 if (isspace(*s)) {
249 ++s;
250 continue;
251 }
252
253 if (state == kBeforeDict) {
254 if (*s == '{') {
255 ++s;
256 state = kInsideDict;
257 continue;
258 }
259 return util::ErrStatus("Unexpected character before JSON dict");
260 }
261
262 if (state == kAfterDict)
263 return util::ErrStatus("Unexpected character after JSON dict");
264
265 PERFETTO_DCHECK(state == kInsideDict);
266 PERFETTO_DCHECK(s < end);
267
268 if (*s == '}') {
269 ++s;
270 state = kAfterDict;
271 continue;
272 }
273
274 std::string current_key;
275 auto res = ReadOneJsonKey(s, end, ¤t_key, &s);
276 if (res == ReadKeyRes::kEndOfDictionary)
277 break;
278
279 if (res == ReadKeyRes::kFatalError)
280 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
281
282 if (res == ReadKeyRes::kNeedsMoreData) {
283 return util::ErrStatus("Failure parsing JSON: partial JSON dictionary");
284 }
285
286 PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
287
288 if (*s == '[') {
289 return util::ErrStatus(
290 "Failure parsing JSON: unsupported JSON dictionary with array");
291 }
292
293 std::string value_str;
294 if (*s == '{') {
295 base::StringView dict_str;
296 ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
297 if (dict_res == ReadDictRes::kNeedsMoreData ||
298 dict_res == ReadDictRes::kEndOfArray ||
299 dict_res == ReadDictRes::kEndOfTrace) {
300 return util::ErrStatus(
301 "Failure parsing JSON: unable to parse dictionary");
302 }
303 value_str = dict_str.ToStdString();
304 } else if (*s == '"') {
305 auto str_res = ReadOneJsonString(s + 1, end, &value_str, &s);
306 if (str_res == ReadStringRes::kNeedsMoreData ||
307 str_res == ReadStringRes::kFatalError) {
308 return util::ErrStatus("Failure parsing JSON: unable to parse string");
309 }
310 } else {
311 const char* value_start = s;
312 const char* value_end = end;
313 for (; s < end; ++s) {
314 if (*s == ',' || isspace(*s) || *s == '}') {
315 value_end = s;
316 break;
317 }
318 }
319 value_str = std::string(value_start, value_end);
320 }
321
322 if (key == current_key) {
323 *value = value_str;
324 return util::OkStatus();
325 }
326 }
327
328 if (state != kAfterDict)
329 return util::ErrStatus("Failure parsing JSON: malformed dictionary");
330
331 *value = base::nullopt;
332 return util::OkStatus();
333 }
334
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)335 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
336 const char* end,
337 std::string* line,
338 const char** next) {
339 bool is_escaping = false;
340 for (const char* s = start; s < end; s++) {
341 // If we get a quote character and we're not escaping, we are done with the
342 // system trace string.
343 if (*s == '"' && !is_escaping) {
344 *next = s + 1;
345 return ReadSystemLineRes::kEndOfSystemTrace;
346 }
347
348 // If we are escaping n, that means this is a new line which is a delimiter
349 // for a system trace line.
350 if (*s == 'n' && is_escaping) {
351 *next = s + 1;
352 return ReadSystemLineRes::kFoundLine;
353 }
354
355 util::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
356 if (!status.ok())
357 return ReadSystemLineRes::kFatalError;
358
359 // If we're in a string and we see a backslash and the last character was
360 // not a backslash the next character is escaped:
361 is_escaping = *s == '\\' && !is_escaping;
362 }
363 return ReadSystemLineRes::kNeedsMoreData;
364 }
365
JsonTraceTokenizer(TraceProcessorContext * ctx)366 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
367 : context_(ctx) {}
368 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
369
Parse(TraceBlobView blob)370 util::Status JsonTraceTokenizer::Parse(TraceBlobView blob) {
371 PERFETTO_DCHECK(json::IsJsonSupported());
372
373 buffer_.insert(buffer_.end(), blob.data(), blob.data() + blob.size());
374 const char* buf = buffer_.data();
375 const char* next = buf;
376 const char* end = buf + buffer_.size();
377
378 if (offset_ == 0) {
379 // Strip leading whitespace.
380 while (next != end && isspace(*next)) {
381 next++;
382 }
383 if (next == end) {
384 return util::ErrStatus(
385 "Failure parsing JSON: first chunk has only whitespace");
386 }
387
388 // Trace could begin in any of these ways:
389 // {"traceEvents":[{
390 // { "traceEvents": [{
391 // [{
392 if (*next != '{' && *next != '[') {
393 return util::ErrStatus(
394 "Failure parsing JSON: first non-whitespace character is not [ or {");
395 }
396
397 // Figure out the format of the JSON file based on the first non-whitespace
398 // character.
399 format_ = *next == '{' ? TraceFormat::kOuterDictionary
400 : TraceFormat::kOnlyTraceEvents;
401
402 // Skip the '[' or '{' character.
403 next++;
404
405 // Set our current position based on the format of the trace.
406 position_ = format_ == TraceFormat::kOuterDictionary
407 ? TracePosition::kDictionaryKey
408 : TracePosition::kTraceEventsArray;
409 }
410
411 auto status = ParseInternal(next, end, &next);
412 if (!status.ok())
413 return status;
414
415 offset_ += static_cast<uint64_t>(next - buf);
416 buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
417 return util::OkStatus();
418 }
419
ParseInternal(const char * start,const char * end,const char ** out)420 util::Status JsonTraceTokenizer::ParseInternal(const char* start,
421 const char* end,
422 const char** out) {
423 PERFETTO_DCHECK(json::IsJsonSupported());
424 auto* trace_sorter = context_->sorter.get();
425
426 const char* next = start;
427 switch (position_) {
428 case TracePosition::kDictionaryKey: {
429 if (format_ != TraceFormat::kOuterDictionary) {
430 return util::ErrStatus(
431 "Failure parsing JSON: illegal format when parsing dictionary key");
432 }
433
434 std::string key;
435 auto res = ReadOneJsonKey(start, end, &key, &next);
436 if (res == ReadKeyRes::kFatalError)
437 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
438
439 if (res == ReadKeyRes::kEndOfDictionary ||
440 res == ReadKeyRes::kNeedsMoreData) {
441 break;
442 }
443
444 if (key == "traceEvents") {
445 position_ = TracePosition::kTraceEventsArray;
446 return ParseInternal(next + 1, end, out);
447 } else if (key == "systemTraceEvents") {
448 position_ = TracePosition::kSystemTraceEventsString;
449 return ParseInternal(next + 1, end, out);
450 } else if (key == "metadata") {
451 position_ = TracePosition::kWaitingForMetadataDictionary;
452 return ParseInternal(next + 1, end, out);
453 } else if (key == "displayTimeUnit") {
454 std::string time_unit;
455 auto result = ReadOneJsonString(next + 1, end, &time_unit, &next);
456 if (result == ReadStringRes::kFatalError)
457 return util::ErrStatus("Could not parse displayTimeUnit");
458 context_->storage->IncrementStats(stats::json_display_time_unit);
459 return ParseInternal(next, end, out);
460 } else if (key == "otherData") {
461 base::StringView unparsed;
462 const auto other = ReadOneJsonDict(next, end, &unparsed, &next);
463 if (other == ReadDictRes::kEndOfArray)
464 return util::ErrStatus(
465 "Failure parsing JSON: Missing ] in otherData");
466 if (other == ReadDictRes::kEndOfTrace)
467 return util::ErrStatus(
468 "Failure parsing JSON: Failed parsing otherData");
469 if (other == ReadDictRes::kNeedsMoreData)
470 return util::ErrStatus("Failure parsing JSON: otherData too large");
471 return ParseInternal(next, end, out);
472 } else {
473 // If we don't recognize the key, just ignore the rest of the trace and
474 // go to EOF.
475 // TODO(lalitm): do something better here.
476 position_ = TracePosition::kEof;
477 break;
478 }
479 }
480 case TracePosition::kSystemTraceEventsString: {
481 if (format_ != TraceFormat::kOuterDictionary) {
482 return util::ErrStatus(
483 "Failure parsing JSON: illegal format when parsing system events");
484 }
485
486 while (next < end) {
487 std::string raw_line;
488 auto res = ReadOneSystemTraceLine(next, end, &raw_line, &next);
489 if (res == ReadSystemLineRes::kFatalError)
490 return util::ErrStatus(
491 "Failure parsing JSON: encountered fatal error");
492
493 if (res == ReadSystemLineRes::kNeedsMoreData)
494 break;
495
496 if (res == ReadSystemLineRes::kEndOfSystemTrace) {
497 position_ = TracePosition::kDictionaryKey;
498 return ParseInternal(next, end, out);
499 }
500
501 if (base::StartsWith(raw_line, "#") || raw_line.empty())
502 continue;
503
504 std::unique_ptr<SystraceLine> line(new SystraceLine());
505 util::Status status =
506 systrace_line_tokenizer_.Tokenize(raw_line, line.get());
507 if (!status.ok())
508 return status;
509 trace_sorter->PushSystraceLine(std::move(line));
510 }
511 break;
512 }
513 case TracePosition::kWaitingForMetadataDictionary: {
514 if (format_ != TraceFormat::kOuterDictionary) {
515 return util::ErrStatus(
516 "Failure parsing JSON: illegal format when parsing metadata");
517 }
518
519 base::StringView unparsed;
520 const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
521 if (res == ReadDictRes::kEndOfArray)
522 return util::ErrStatus("Failure parsing JSON: encountered fatal error");
523 if (res == ReadDictRes::kEndOfTrace ||
524 res == ReadDictRes::kNeedsMoreData) {
525 break;
526 }
527
528 // TODO(lalitm): read and ingest the relevant data inside |value|.
529 position_ = TracePosition::kDictionaryKey;
530 break;
531 }
532 case TracePosition::kTraceEventsArray: {
533 while (next < end) {
534 base::StringView unparsed;
535 const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
536 if (res == ReadDictRes::kEndOfTrace ||
537 res == ReadDictRes::kNeedsMoreData) {
538 break;
539 }
540
541 if (res == ReadDictRes::kEndOfArray) {
542 position_ = format_ == TraceFormat::kOuterDictionary
543 ? TracePosition::kDictionaryKey
544 : TracePosition::kEof;
545 break;
546 }
547
548 base::Optional<std::string> opt_raw_ts;
549 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
550 base::Optional<int64_t> opt_ts =
551 opt_raw_ts ? json::CoerceToTs(*opt_raw_ts) : base::nullopt;
552 int64_t ts = 0;
553 if (opt_ts.has_value()) {
554 ts = opt_ts.value();
555 } else {
556 // Metadata events may omit ts. In all other cases error:
557 base::Optional<std::string> opt_raw_ph;
558 RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
559 if (!opt_raw_ph || *opt_raw_ph != "M") {
560 context_->storage->IncrementStats(stats::json_tokenizer_failure);
561 continue;
562 }
563 }
564 trace_sorter->PushJsonValue(ts, unparsed.ToStdString());
565 }
566 break;
567 }
568 case TracePosition::kEof: {
569 break;
570 }
571 }
572 *out = next;
573 return util::OkStatus();
574 }
575
NotifyEndOfFile()576 void JsonTraceTokenizer::NotifyEndOfFile() {}
577
578 } // namespace trace_processor
579 } // namespace perfetto
580