1 /*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "RegularExpression.h"
29
30 #include "Logging.h"
31 #include <pcre/pcre.h>
32
33 namespace WebCore {
34
35 class RegularExpression::Private : public RefCounted<Private> {
36 public:
37 static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity);
38 ~Private();
39
regexp() const40 JSRegExp* regexp() const { return m_regexp; }
41 int lastMatchLength;
42
43 private:
44 Private(const String& pattern, TextCaseSensitivity);
45 static JSRegExp* compile(const String& pattern, TextCaseSensitivity);
46
47 JSRegExp* m_regexp;
48 };
49
compile(const String & pattern,TextCaseSensitivity caseSensitivity)50 inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity)
51 {
52 const char* errorMessage;
53 JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(),
54 caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
55 0, &errorMessage);
56 if (!regexp)
57 LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
58 return regexp;
59 }
60
Private(const String & pattern,TextCaseSensitivity caseSensitivity)61 inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity)
62 : lastMatchLength(-1)
63 , m_regexp(compile(pattern, caseSensitivity))
64 {
65 }
66
create(const String & pattern,TextCaseSensitivity caseSensitivity)67 inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity)
68 {
69 return adoptRef(new Private(pattern, caseSensitivity));
70 }
71
~Private()72 RegularExpression::Private::~Private()
73 {
74 jsRegExpFree(m_regexp);
75 }
76
RegularExpression(const String & pattern,TextCaseSensitivity caseSensitivity)77 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
78 : d(Private::create(pattern, caseSensitivity))
79 {
80 }
81
RegularExpression(const RegularExpression & re)82 RegularExpression::RegularExpression(const RegularExpression& re)
83 : d(re.d)
84 {
85 }
86
~RegularExpression()87 RegularExpression::~RegularExpression()
88 {
89 }
90
operator =(const RegularExpression & re)91 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
92 {
93 d = re.d;
94 return *this;
95 }
96
match(const String & str,int startFrom,int * matchLength) const97 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
98 {
99 if (!d->regexp())
100 return -1;
101
102 if (str.isNull())
103 return -1;
104
105 // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
106 static const size_t maxOffsets = 3;
107 int offsets[maxOffsets];
108 int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets);
109 if (result < 0) {
110 if (result != JSRegExpErrorNoMatch)
111 LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result);
112 d->lastMatchLength = -1;
113 return -1;
114 }
115
116 // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
117 d->lastMatchLength = offsets[1] - offsets[0];
118 if (matchLength)
119 *matchLength = d->lastMatchLength;
120 return offsets[0];
121 }
122
searchRev(const String & str) const123 int RegularExpression::searchRev(const String& str) const
124 {
125 // FIXME: This could be faster if it actually searched backwards.
126 // Instead, it just searches forwards, multiple times until it finds the last match.
127
128 int start = 0;
129 int pos;
130 int lastPos = -1;
131 int lastMatchLength = -1;
132 do {
133 int matchLength;
134 pos = match(str, start, &matchLength);
135 if (pos >= 0) {
136 if (pos + matchLength > lastPos + lastMatchLength) {
137 // replace last match if this one is later and not a subset of the last match
138 lastPos = pos;
139 lastMatchLength = matchLength;
140 }
141 start = pos + 1;
142 }
143 } while (pos != -1);
144 d->lastMatchLength = lastMatchLength;
145 return lastPos;
146 }
147
matchedLength() const148 int RegularExpression::matchedLength() const
149 {
150 return d->lastMatchLength;
151 }
152
replace(String & string,const RegularExpression & target,const String & replacement)153 void replace(String& string, const RegularExpression& target, const String& replacement)
154 {
155 int index = 0;
156 while (index < static_cast<int>(string.length())) {
157 int matchLength;
158 index = target.match(string, index, &matchLength);
159 if (index < 0)
160 break;
161 string.replace(index, matchLength, replacement);
162 index += replacement.length();
163 if (!matchLength)
164 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
165 }
166 }
167
168 } // namespace WebCore
169