• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/tools/profile_reset/jtl_parser.h"
6 
7 #include <algorithm>
8 
9 #include "base/logging.h"
10 #include "third_party/re2/re2/re2.h"
11 
12 namespace {
13 
14 // RegEx that matches the first line of a text. Will throw away any potential
15 // double-slash-introduced comments and the potential trailing EOL character.
16 // Note: will fail in case the first line contains an unmatched double-quote
17 // outside of comments.
18 const char kSingleLineWithMaybeCommentsRE[] =
19     // Non-greedily match and capture sequences of 1.) string literals inside
20     // correctly matched double-quotes, or 2.) any other character.
21     "^((?:\"[^\"\\n]*\"|[^\"\\n])*?)"
22     // Greedily match and throw away the potential comment.
23     "(?://.*)?"
24     // Match and throw away EOL, or match end-of-string.
25     "(?:\n|$)";
26 
27 // RegEx to match either a double-quote-enclosed string literal or a whitespace.
28 // Applied repeatedly and without overlapping, can be used to remove whitespace
29 // outside of string literals.
30 const char kRemoveWhitespaceRE[] = "(\"[^\"]*\")|\\s";
31 
32 // The substitution pattern to use together with the above when replacing. As
33 // the whitespace is not back-referenced here, it will get removed.
34 const char kRemoveWhitespaceRewrite[] = "\\1";
35 
36 // Separator to terminate a sentence.
37 const char kEndOfSentenceSeparator[] = ";";
38 
39 // The 'true' Boolean keyword.
40 const char kTrueKeyword[] = "true";
41 
42 // RegEx that matches and captures one argument, which is either a double-quote
43 // enclosed string, or a Boolean value. Will throw away a trailing comma.
44 const char kSingleArgumentRE[] = "(?:(?:\"([^\"]*)\"|(true|false))(?:,|$))";
45 
46 // RegEx-es that, when concatenated, will match a single operation, and capture
47 // the: operation name, the optional arguments, and the separator that follows.
48 const char kOperationNameRE[] = "([[:word:]]+)";
49 const char kMaybeArgumentListRE[] =
50     "(?:\\("                    // Opening parenthesis.
51     "((?:\"[^\"]*\"|[^\")])*)"  // Capture: anything inside, quote-aware.
52     "\\))?";                    // Closing parenthesis + everything optional.
53 const char kOperationSeparatorRE[] = "(;|\\.)";
54 
55 }  // namespace
56 
57 struct JtlParser::ParsingState {
ParsingStateJtlParser::ParsingState58   explicit ParsingState(const re2::StringPiece& compacted_source)
59       : single_operation_regex(std::string(kOperationNameRE) +
60                                kMaybeArgumentListRE +
61                                kOperationSeparatorRE),
62         single_argument_regex(kSingleArgumentRE),
63         remaining_compacted_source(compacted_source),
64         last_line_number(0) {}
65 
66   RE2 single_operation_regex;
67   RE2 single_argument_regex;
68   re2::StringPiece remaining_compacted_source;
69   re2::StringPiece last_context;
70   size_t last_line_number;
71 };
72 
JtlParser(const std::string & compacted_source_code,const std::vector<size_t> & newline_indices)73 JtlParser::JtlParser(const std::string& compacted_source_code,
74                      const std::vector<size_t>& newline_indices)
75     : compacted_source_(compacted_source_code),
76       newline_indices_(newline_indices) {
77   state_.reset(new ParsingState(compacted_source_));
78 }
79 
~JtlParser()80 JtlParser::~JtlParser() {}
81 
82 // static
RemoveCommentsAndAllWhitespace(const std::string & verbose_text,std::string * compacted_text,std::vector<size_t> * newline_indices,size_t * error_line_number)83 bool JtlParser::RemoveCommentsAndAllWhitespace(
84     const std::string& verbose_text,
85     std::string* compacted_text,
86     std::vector<size_t>* newline_indices,
87     size_t* error_line_number) {
88   DCHECK(compacted_text);
89   DCHECK(newline_indices);
90   std::string line;
91   RE2 single_line_regex(kSingleLineWithMaybeCommentsRE);
92   RE2 remove_whitespace_regex(kRemoveWhitespaceRE);
93   re2::StringPiece verbose_text_piece(verbose_text);
94   compacted_text->clear();
95   newline_indices->clear();
96   while (!verbose_text_piece.empty()) {
97     if (!RE2::Consume(&verbose_text_piece, single_line_regex, &line)) {
98       if (error_line_number)
99         *error_line_number = newline_indices->size();
100       return false;
101     }
102     RE2::GlobalReplace(
103         &line, remove_whitespace_regex, kRemoveWhitespaceRewrite);
104     *compacted_text += line;
105     newline_indices->push_back(compacted_text->size());
106   }
107   return true;
108 }
109 
HasFinished()110 bool JtlParser::HasFinished() {
111   return state_->remaining_compacted_source.empty();
112 }
113 
ParseNextOperation(std::string * name,base::ListValue * argument_list,bool * ends_sentence)114 bool JtlParser::ParseNextOperation(std::string* name,
115                                    base::ListValue* argument_list,
116                                    bool* ends_sentence) {
117   DCHECK(name);
118   DCHECK(argument_list);
119   DCHECK(ends_sentence);
120 
121   state_->last_context = state_->remaining_compacted_source;
122   state_->last_line_number = GetOriginalLineNumber(
123       compacted_source_.size() - state_->remaining_compacted_source.length());
124 
125   std::string arguments, separator;
126   if (!RE2::Consume(&state_->remaining_compacted_source,
127                     state_->single_operation_regex,
128                     name,
129                     &arguments,
130                     &separator))
131     return false;
132 
133   *ends_sentence = (separator == kEndOfSentenceSeparator);
134   state_->last_context.remove_suffix(state_->remaining_compacted_source.size());
135 
136   re2::StringPiece arguments_piece(arguments);
137   std::string string_value, boolean_value;
138   while (!arguments_piece.empty()) {
139     if (!RE2::Consume(&arguments_piece,
140                       state_->single_argument_regex,
141                       &string_value,
142                       &boolean_value))
143       return false;
144 
145     if (!boolean_value.empty()) {
146       argument_list->Append(
147           new base::FundamentalValue(boolean_value == kTrueKeyword));
148     } else {
149       // |string_value| might be empty for an empty string
150       argument_list->Append(new base::StringValue(string_value));
151     }
152   }
153   return true;
154 }
155 
GetOriginalLineNumber(size_t compacted_index) const156 size_t JtlParser::GetOriginalLineNumber(size_t compacted_index) const {
157   return static_cast<size_t>(std::upper_bound(newline_indices_.begin(),
158                                               newline_indices_.end(),
159                                               compacted_index) -
160                              newline_indices_.begin());
161 }
162 
GetLastLineNumber() const163 size_t JtlParser::GetLastLineNumber() const { return state_->last_line_number; }
164 
GetLastContext() const165 std::string JtlParser::GetLastContext() const {
166   return state_->last_context.ToString();
167 }
168