1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/tools/profile_reset/jtl_parser.h"
6
7 #include <algorithm>
8
9 #include "base/logging.h"
10 #include "third_party/re2/re2/re2.h"
11
12 namespace {
13
14 // RegEx that matches the first line of a text. Will throw away any potential
15 // double-slash-introduced comments and the potential trailing EOL character.
16 // Note: will fail in case the first line contains an unmatched double-quote
17 // outside of comments.
18 const char kSingleLineWithMaybeCommentsRE[] =
19 // Non-greedily match and capture sequences of 1.) string literals inside
20 // correctly matched double-quotes, or 2.) any other character.
21 "^((?:\"[^\"\\n]*\"|[^\"\\n])*?)"
22 // Greedily match and throw away the potential comment.
23 "(?://.*)?"
24 // Match and throw away EOL, or match end-of-string.
25 "(?:\n|$)";
26
27 // RegEx to match either a double-quote-enclosed string literal or a whitespace.
28 // Applied repeatedly and without overlapping, can be used to remove whitespace
29 // outside of string literals.
30 const char kRemoveWhitespaceRE[] = "(\"[^\"]*\")|\\s";
31
32 // The substitution pattern to use together with the above when replacing. As
33 // the whitespace is not back-referenced here, it will get removed.
34 const char kRemoveWhitespaceRewrite[] = "\\1";
35
36 // Separator to terminate a sentence.
37 const char kEndOfSentenceSeparator[] = ";";
38
39 // The 'true' Boolean keyword.
40 const char kTrueKeyword[] = "true";
41
42 // RegEx that matches and captures one argument, which is either a double-quote
43 // enclosed string, or a Boolean value. Will throw away a trailing comma.
44 const char kSingleArgumentRE[] = "(?:(?:\"([^\"]*)\"|(true|false))(?:,|$))";
45
46 // RegEx-es that, when concatenated, will match a single operation, and capture
47 // the: operation name, the optional arguments, and the separator that follows.
48 const char kOperationNameRE[] = "([[:word:]]+)";
49 const char kMaybeArgumentListRE[] =
50 "(?:\\(" // Opening parenthesis.
51 "((?:\"[^\"]*\"|[^\")])*)" // Capture: anything inside, quote-aware.
52 "\\))?"; // Closing parenthesis + everything optional.
53 const char kOperationSeparatorRE[] = "(;|\\.)";
54
55 } // namespace
56
57 struct JtlParser::ParsingState {
ParsingStateJtlParser::ParsingState58 explicit ParsingState(const re2::StringPiece& compacted_source)
59 : single_operation_regex(std::string(kOperationNameRE) +
60 kMaybeArgumentListRE +
61 kOperationSeparatorRE),
62 single_argument_regex(kSingleArgumentRE),
63 remaining_compacted_source(compacted_source),
64 last_line_number(0) {}
65
66 RE2 single_operation_regex;
67 RE2 single_argument_regex;
68 re2::StringPiece remaining_compacted_source;
69 re2::StringPiece last_context;
70 size_t last_line_number;
71 };
72
JtlParser(const std::string & compacted_source_code,const std::vector<size_t> & newline_indices)73 JtlParser::JtlParser(const std::string& compacted_source_code,
74 const std::vector<size_t>& newline_indices)
75 : compacted_source_(compacted_source_code),
76 newline_indices_(newline_indices) {
77 state_.reset(new ParsingState(compacted_source_));
78 }
79
~JtlParser()80 JtlParser::~JtlParser() {}
81
82 // static
RemoveCommentsAndAllWhitespace(const std::string & verbose_text,std::string * compacted_text,std::vector<size_t> * newline_indices,size_t * error_line_number)83 bool JtlParser::RemoveCommentsAndAllWhitespace(
84 const std::string& verbose_text,
85 std::string* compacted_text,
86 std::vector<size_t>* newline_indices,
87 size_t* error_line_number) {
88 DCHECK(compacted_text);
89 DCHECK(newline_indices);
90 std::string line;
91 RE2 single_line_regex(kSingleLineWithMaybeCommentsRE);
92 RE2 remove_whitespace_regex(kRemoveWhitespaceRE);
93 re2::StringPiece verbose_text_piece(verbose_text);
94 compacted_text->clear();
95 newline_indices->clear();
96 while (!verbose_text_piece.empty()) {
97 if (!RE2::Consume(&verbose_text_piece, single_line_regex, &line)) {
98 if (error_line_number)
99 *error_line_number = newline_indices->size();
100 return false;
101 }
102 RE2::GlobalReplace(
103 &line, remove_whitespace_regex, kRemoveWhitespaceRewrite);
104 *compacted_text += line;
105 newline_indices->push_back(compacted_text->size());
106 }
107 return true;
108 }
109
HasFinished()110 bool JtlParser::HasFinished() {
111 return state_->remaining_compacted_source.empty();
112 }
113
ParseNextOperation(std::string * name,base::ListValue * argument_list,bool * ends_sentence)114 bool JtlParser::ParseNextOperation(std::string* name,
115 base::ListValue* argument_list,
116 bool* ends_sentence) {
117 DCHECK(name);
118 DCHECK(argument_list);
119 DCHECK(ends_sentence);
120
121 state_->last_context = state_->remaining_compacted_source;
122 state_->last_line_number = GetOriginalLineNumber(
123 compacted_source_.size() - state_->remaining_compacted_source.length());
124
125 std::string arguments, separator;
126 if (!RE2::Consume(&state_->remaining_compacted_source,
127 state_->single_operation_regex,
128 name,
129 &arguments,
130 &separator))
131 return false;
132
133 *ends_sentence = (separator == kEndOfSentenceSeparator);
134 state_->last_context.remove_suffix(state_->remaining_compacted_source.size());
135
136 re2::StringPiece arguments_piece(arguments);
137 std::string string_value, boolean_value;
138 while (!arguments_piece.empty()) {
139 if (!RE2::Consume(&arguments_piece,
140 state_->single_argument_regex,
141 &string_value,
142 &boolean_value))
143 return false;
144
145 if (!boolean_value.empty()) {
146 argument_list->Append(
147 new base::FundamentalValue(boolean_value == kTrueKeyword));
148 } else {
149 // |string_value| might be empty for an empty string
150 argument_list->Append(new StringValue(string_value));
151 }
152 }
153 return true;
154 }
155
GetOriginalLineNumber(size_t compacted_index) const156 size_t JtlParser::GetOriginalLineNumber(size_t compacted_index) const {
157 return static_cast<size_t>(std::upper_bound(newline_indices_.begin(),
158 newline_indices_.end(),
159 compacted_index) -
160 newline_indices_.begin());
161 }
162
GetLastLineNumber() const163 size_t JtlParser::GetLastLineNumber() const { return state_->last_line_number; }
164
GetLastContext() const165 std::string JtlParser::GetLastContext() const {
166 return state_->last_context.ToString();
167 }
168