1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31
32 #include <vector>
33 #include <assert.h>
34
35 #include "pcrecpp_internal.h"
36 #include "pcre_scanner.h"
37
38 using std::vector;
39
40 namespace pcrecpp {
41
Scanner()42 Scanner::Scanner()
43 : data_(),
44 input_(data_),
45 skip_(NULL),
46 should_skip_(false),
47 skip_repeat_(false),
48 save_comments_(false),
49 comments_(NULL),
50 comments_offset_(0) {
51 }
52
Scanner(const string & in)53 Scanner::Scanner(const string& in)
54 : data_(in),
55 input_(data_),
56 skip_(NULL),
57 should_skip_(false),
58 skip_repeat_(false),
59 save_comments_(false),
60 comments_(NULL),
61 comments_offset_(0) {
62 }
63
~Scanner()64 Scanner::~Scanner() {
65 delete skip_;
66 delete comments_;
67 }
68
SetSkipExpression(const char * re)69 void Scanner::SetSkipExpression(const char* re) {
70 delete skip_;
71 if (re != NULL) {
72 skip_ = new RE(re);
73 should_skip_ = true;
74 skip_repeat_ = true;
75 ConsumeSkip();
76 } else {
77 skip_ = NULL;
78 should_skip_ = false;
79 skip_repeat_ = false;
80 }
81 }
82
Skip(const char * re)83 void Scanner::Skip(const char* re) {
84 delete skip_;
85 if (re != NULL) {
86 skip_ = new RE(re);
87 should_skip_ = true;
88 skip_repeat_ = false;
89 ConsumeSkip();
90 } else {
91 skip_ = NULL;
92 should_skip_ = false;
93 skip_repeat_ = false;
94 }
95 }
96
DisableSkip()97 void Scanner::DisableSkip() {
98 assert(skip_ != NULL);
99 should_skip_ = false;
100 }
101
EnableSkip()102 void Scanner::EnableSkip() {
103 assert(skip_ != NULL);
104 should_skip_ = true;
105 ConsumeSkip();
106 }
107
LineNumber() const108 int Scanner::LineNumber() const {
109 // TODO: Make it more efficient by keeping track of the last point
110 // where we computed line numbers and counting newlines since then.
111 // We could use std:count, but not all systems have it. :-(
112 int count = 1;
113 for (const char* p = data_.data(); p < input_.data(); ++p)
114 if (*p == '\n')
115 ++count;
116 return count;
117 }
118
Offset() const119 int Scanner::Offset() const {
120 return (int)(input_.data() - data_.c_str());
121 }
122
LookingAt(const RE & re) const123 bool Scanner::LookingAt(const RE& re) const {
124 int consumed;
125 return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
126 }
127
128
129 // helper function to consume *skip_ and honour save_comments_
ConsumeSkip()130 void Scanner::ConsumeSkip() {
131 const char* start_data = input_.data();
132 while (skip_->Consume(&input_)) {
133 if (!skip_repeat_) {
134 // Only one skip allowed.
135 break;
136 }
137 }
138 if (save_comments_) {
139 if (comments_ == NULL) {
140 comments_ = new vector<StringPiece>;
141 }
142 // already pointing one past end, so no need to +1
143 int length = (int)(input_.data() - start_data);
144 if (length > 0) {
145 comments_->push_back(StringPiece(start_data, length));
146 }
147 }
148 }
149
150
GetComments(int start,int end,vector<StringPiece> * ranges)151 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
152 // short circuit out if we've not yet initialized comments_
153 // (e.g., when save_comments is false)
154 if (!comments_) {
155 return;
156 }
157 // TODO: if we guarantee that comments_ will contain StringPieces
158 // that are ordered by their start, then we can do a binary search
159 // for the first StringPiece at or past start and then scan for the
160 // ones contained in the range, quit early (use equal_range or
161 // lower_bound)
162 for (vector<StringPiece>::const_iterator it = comments_->begin();
163 it != comments_->end(); ++it) {
164 if ((it->data() >= data_.c_str() + start &&
165 it->data() + it->size() <= data_.c_str() + end)) {
166 ranges->push_back(*it);
167 }
168 }
169 }
170
171
GetNextComments(vector<StringPiece> * ranges)172 void Scanner::GetNextComments(vector<StringPiece> *ranges) {
173 // short circuit out if we've not yet initialized comments_
174 // (e.g., when save_comments is false)
175 if (!comments_) {
176 return;
177 }
178 for (vector<StringPiece>::const_iterator it =
179 comments_->begin() + comments_offset_;
180 it != comments_->end(); ++it) {
181 ranges->push_back(*it);
182 ++comments_offset_;
183 }
184 }
185
186 } // namespace pcrecpp
187