1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef RegexPattern_h 27 #define RegexPattern_h 28 29 #include <wtf/Platform.h> 30 31 #if ENABLE(YARR) 32 33 #include <wtf/Vector.h> 34 #include <wtf/unicode/Unicode.h> 35 36 37 namespace JSC { namespace Yarr { 38 39 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. 40 #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. 41 #define RegexStackSpaceForBackTrackInfoBackReference 2 42 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. 43 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 44 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. 45 #define RegexStackSpaceForBackTrackInfoParentheses 4 46 47 struct PatternDisjunction; 48 49 struct CharacterRange { 50 UChar begin; 51 UChar end; 52 CharacterRangeCharacterRange53 CharacterRange(UChar begin, UChar end) 54 : begin(begin) 55 , end(end) 56 { 57 } 58 }; 59 60 struct CharacterClass : FastAllocBase { 61 Vector<UChar> m_matches; 62 Vector<CharacterRange> m_ranges; 63 Vector<UChar> m_matchesUnicode; 64 Vector<CharacterRange> m_rangesUnicode; 65 }; 66 67 enum QuantifierType { 68 QuantifierFixedCount, 69 QuantifierGreedy, 70 QuantifierNonGreedy, 71 }; 72 73 struct PatternTerm { 74 enum Type { 75 TypeAssertionBOL, 76 TypeAssertionEOL, 77 TypeAssertionWordBoundary, 78 TypePatternCharacter, 79 TypeCharacterClass, 80 TypeBackReference, 81 TypeForwardReference, 82 TypeParenthesesSubpattern, 83 TypeParentheticalAssertion, 84 } type; 85 bool invertOrCapture; 86 union { 87 UChar patternCharacter; 88 CharacterClass* characterClass; 89 unsigned subpatternId; 90 struct { 91 PatternDisjunction* disjunction; 92 unsigned subpatternId; 93 unsigned lastSubpatternId; 94 bool isCopy; 95 } parentheses; 96 }; 97 QuantifierType quantityType; 98 unsigned quantityCount; 99 int inputPosition; 100 unsigned frameLocation; 101 PatternTermPatternTerm102 PatternTerm(UChar ch) 103 : type(PatternTerm::TypePatternCharacter) 104 { 105 patternCharacter = ch; 106 quantityType = QuantifierFixedCount; 107 quantityCount = 1; 108 } 109 PatternTermPatternTerm110 PatternTerm(CharacterClass* charClass, bool invert) 111 : type(PatternTerm::TypeCharacterClass) 112 , invertOrCapture(invert) 113 { 114 characterClass = charClass; 115 quantityType = QuantifierFixedCount; 116 quantityCount = 1; 117 } 118 PatternTermPatternTerm119 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) 120 : type(type) 121 , invertOrCapture(invertOrCapture) 122 { 123 parentheses.disjunction = disjunction; 124 parentheses.subpatternId = subpatternId; 125 parentheses.isCopy = false; 126 quantityType = QuantifierFixedCount; 127 quantityCount = 1; 128 } 129 130 PatternTerm(Type type, bool invert = false) typePatternTerm131 : type(type) 132 , invertOrCapture(invert) 133 { 134 quantityType = QuantifierFixedCount; 135 quantityCount = 1; 136 } 137 PatternTermPatternTerm138 PatternTerm(unsigned spatternId) 139 : type(TypeBackReference) 140 , invertOrCapture(invertOrCapture) 141 { 142 subpatternId = spatternId; 143 quantityType = QuantifierFixedCount; 144 quantityCount = 1; 145 } 146 ForwardReferencePatternTerm147 static PatternTerm ForwardReference() 148 { 149 return PatternTerm(TypeForwardReference); 150 } 151 BOLPatternTerm152 static PatternTerm BOL() 153 { 154 return PatternTerm(TypeAssertionBOL); 155 } 156 EOLPatternTerm157 static PatternTerm EOL() 158 { 159 return PatternTerm(TypeAssertionEOL); 160 } 161 WordBoundaryPatternTerm162 static PatternTerm WordBoundary(bool invert) 163 { 164 return PatternTerm(TypeAssertionWordBoundary, invert); 165 } 166 invertPatternTerm167 bool invert() 168 { 169 return invertOrCapture; 170 } 171 capturePatternTerm172 bool capture() 173 { 174 return invertOrCapture; 175 } 176 quantifyPatternTerm177 void quantify(unsigned count, QuantifierType type) 178 { 179 quantityCount = count; 180 quantityType = type; 181 } 182 }; 183 184 struct PatternAlternative : FastAllocBase { PatternAlternativePatternAlternative185 PatternAlternative(PatternDisjunction* disjunction) 186 : m_parent(disjunction) 187 { 188 } 189 lastTermPatternAlternative190 PatternTerm& lastTerm() 191 { 192 ASSERT(m_terms.size()); 193 return m_terms[m_terms.size() - 1]; 194 } 195 removeLastTermPatternAlternative196 void removeLastTerm() 197 { 198 ASSERT(m_terms.size()); 199 m_terms.shrink(m_terms.size() - 1); 200 } 201 202 Vector<PatternTerm> m_terms; 203 PatternDisjunction* m_parent; 204 unsigned m_minimumSize; 205 bool m_hasFixedSize; 206 }; 207 208 struct PatternDisjunction : FastAllocBase { 209 PatternDisjunction(PatternAlternative* parent = 0) m_parentPatternDisjunction210 : m_parent(parent) 211 { 212 } 213 ~PatternDisjunctionPatternDisjunction214 ~PatternDisjunction() 215 { 216 deleteAllValues(m_alternatives); 217 } 218 addNewAlternativePatternDisjunction219 PatternAlternative* addNewAlternative() 220 { 221 PatternAlternative* alternative = new PatternAlternative(this); 222 m_alternatives.append(alternative); 223 return alternative; 224 } 225 226 Vector<PatternAlternative*> m_alternatives; 227 PatternAlternative* m_parent; 228 unsigned m_minimumSize; 229 unsigned m_callFrameSize; 230 bool m_hasFixedSize; 231 }; 232 233 // You probably don't want to be calling these functions directly 234 // (please to be calling newlineCharacterClass() et al on your 235 // friendly neighborhood RegexPattern instance to get nicely 236 // cached copies). 237 CharacterClass* newlineCreate(); 238 CharacterClass* digitsCreate(); 239 CharacterClass* spacesCreate(); 240 CharacterClass* wordcharCreate(); 241 CharacterClass* nondigitsCreate(); 242 CharacterClass* nonspacesCreate(); 243 CharacterClass* nonwordcharCreate(); 244 245 struct RegexPattern { RegexPatternRegexPattern246 RegexPattern(bool ignoreCase, bool multiline) 247 : m_ignoreCase(ignoreCase) 248 , m_multiline(multiline) 249 , m_numSubpatterns(0) 250 , m_maxBackReference(0) 251 , newlineCached(0) 252 , digitsCached(0) 253 , spacesCached(0) 254 , wordcharCached(0) 255 , nondigitsCached(0) 256 , nonspacesCached(0) 257 , nonwordcharCached(0) 258 { 259 } 260 ~RegexPatternRegexPattern261 ~RegexPattern() 262 { 263 deleteAllValues(m_disjunctions); 264 deleteAllValues(m_userCharacterClasses); 265 } 266 resetRegexPattern267 void reset() 268 { 269 m_numSubpatterns = 0; 270 m_maxBackReference = 0; 271 272 newlineCached = 0; 273 digitsCached = 0; 274 spacesCached = 0; 275 wordcharCached = 0; 276 nondigitsCached = 0; 277 nonspacesCached = 0; 278 nonwordcharCached = 0; 279 280 deleteAllValues(m_disjunctions); 281 m_disjunctions.clear(); 282 deleteAllValues(m_userCharacterClasses); 283 m_userCharacterClasses.clear(); 284 } 285 containsIllegalBackReferenceRegexPattern286 bool containsIllegalBackReference() 287 { 288 return m_maxBackReference > m_numSubpatterns; 289 } 290 newlineCharacterClassRegexPattern291 CharacterClass* newlineCharacterClass() 292 { 293 if (!newlineCached) 294 m_userCharacterClasses.append(newlineCached = newlineCreate()); 295 return newlineCached; 296 } digitsCharacterClassRegexPattern297 CharacterClass* digitsCharacterClass() 298 { 299 if (!digitsCached) 300 m_userCharacterClasses.append(digitsCached = digitsCreate()); 301 return digitsCached; 302 } spacesCharacterClassRegexPattern303 CharacterClass* spacesCharacterClass() 304 { 305 if (!spacesCached) 306 m_userCharacterClasses.append(spacesCached = spacesCreate()); 307 return spacesCached; 308 } wordcharCharacterClassRegexPattern309 CharacterClass* wordcharCharacterClass() 310 { 311 if (!wordcharCached) 312 m_userCharacterClasses.append(wordcharCached = wordcharCreate()); 313 return wordcharCached; 314 } nondigitsCharacterClassRegexPattern315 CharacterClass* nondigitsCharacterClass() 316 { 317 if (!nondigitsCached) 318 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); 319 return nondigitsCached; 320 } nonspacesCharacterClassRegexPattern321 CharacterClass* nonspacesCharacterClass() 322 { 323 if (!nonspacesCached) 324 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); 325 return nonspacesCached; 326 } nonwordcharCharacterClassRegexPattern327 CharacterClass* nonwordcharCharacterClass() 328 { 329 if (!nonwordcharCached) 330 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); 331 return nonwordcharCached; 332 } 333 334 bool m_ignoreCase; 335 bool m_multiline; 336 unsigned m_numSubpatterns; 337 unsigned m_maxBackReference; 338 PatternDisjunction* m_body; 339 Vector<PatternDisjunction*, 4> m_disjunctions; 340 Vector<CharacterClass*> m_userCharacterClasses; 341 342 private: 343 CharacterClass* newlineCached; 344 CharacterClass* digitsCached; 345 CharacterClass* spacesCached; 346 CharacterClass* wordcharCached; 347 CharacterClass* nondigitsCached; 348 CharacterClass* nonspacesCached; 349 CharacterClass* nonwordcharCached; 350 }; 351 352 } } // namespace JSC::Yarr 353 354 #endif 355 356 #endif // RegexPattern_h 357