1 /* 2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either 7 version 2 of the License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public License 15 along with this library; see the file COPYING.LIB. If not, write to 16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 Boston, MA 02110-1301, USA. 18 */ 19 20 #ifndef SegmentedString_h 21 #define SegmentedString_h 22 23 #include "PlatformString.h" 24 #include <wtf/Deque.h> 25 #include <wtf/text/TextPosition.h> 26 27 namespace WebCore { 28 29 class SegmentedString; 30 31 class SegmentedSubstring { 32 public: SegmentedSubstring()33 SegmentedSubstring() 34 : m_length(0) 35 , m_current(0) 36 , m_doNotExcludeLineNumbers(true) 37 { 38 } 39 SegmentedSubstring(const String & str)40 SegmentedSubstring(const String& str) 41 : m_length(str.length()) 42 , m_current(str.isEmpty() ? 0 : str.characters()) 43 , m_string(str) 44 , m_doNotExcludeLineNumbers(true) 45 { 46 } 47 clear()48 void clear() { m_length = 0; m_current = 0; } 49 excludeLineNumbers()50 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } doNotExcludeLineNumbers()51 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } 52 setExcludeLineNumbers()53 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } 54 numberOfCharactersConsumed()55 int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 56 appendTo(String & str)57 void appendTo(String& str) const 58 { 59 if (m_string.characters() == m_current) { 60 if (str.isEmpty()) 61 str = m_string; 62 else 63 str.append(m_string); 64 } else 65 str.append(String(m_current, m_length)); 66 } 67 68 public: 69 int m_length; 70 const UChar* m_current; 71 72 private: 73 String m_string; 74 bool m_doNotExcludeLineNumbers; 75 }; 76 77 class SegmentedString { 78 public: SegmentedString()79 SegmentedString() 80 : m_pushedChar1(0) 81 , m_pushedChar2(0) 82 , m_currentChar(0) 83 , m_numberOfCharactersConsumedPriorToCurrentString(0) 84 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 85 , m_currentLine(0) 86 , m_closed(false) 87 { 88 } 89 SegmentedString(const String & str)90 SegmentedString(const String& str) 91 : m_pushedChar1(0) 92 , m_pushedChar2(0) 93 , m_currentString(str) 94 , m_currentChar(m_currentString.m_current) 95 , m_numberOfCharactersConsumedPriorToCurrentString(0) 96 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 97 , m_currentLine(0) 98 , m_closed(false) 99 { 100 } 101 102 SegmentedString(const SegmentedString&); 103 104 const SegmentedString& operator=(const SegmentedString&); 105 106 void clear(); 107 void close(); 108 109 void append(const SegmentedString&); 110 void prepend(const SegmentedString&); 111 excludeLineNumbers()112 bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } 113 void setExcludeLineNumbers(); 114 push(UChar c)115 void push(UChar c) 116 { 117 if (!m_pushedChar1) { 118 m_pushedChar1 = c; 119 m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; 120 } else { 121 ASSERT(!m_pushedChar2); 122 m_pushedChar2 = c; 123 } 124 } 125 isEmpty()126 bool isEmpty() const { return !current(); } 127 unsigned length() const; 128 isClosed()129 bool isClosed() const { return m_closed; } 130 131 enum LookAheadResult { 132 DidNotMatch, 133 DidMatch, 134 NotEnoughCharacters, 135 }; 136 lookAhead(const String & string)137 LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); } lookAheadIgnoringCase(const String & string)138 LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); } 139 advance()140 void advance() 141 { 142 if (!m_pushedChar1 && m_currentString.m_length > 1) { 143 --m_currentString.m_length; 144 m_currentChar = ++m_currentString.m_current; 145 return; 146 } 147 advanceSlowCase(); 148 } 149 advanceAndASSERT(UChar expectedCharacter)150 void advanceAndASSERT(UChar expectedCharacter) 151 { 152 ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter); 153 advance(); 154 } 155 advanceAndASSERTIgnoringCase(UChar expectedCharacter)156 void advanceAndASSERTIgnoringCase(UChar expectedCharacter) 157 { 158 ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter)); 159 advance(); 160 } 161 advancePastNewline(int & lineNumber)162 void advancePastNewline(int& lineNumber) 163 { 164 ASSERT(*current() == '\n'); 165 if (!m_pushedChar1 && m_currentString.m_length > 1) { 166 int newLineFlag = m_currentString.doNotExcludeLineNumbers(); 167 lineNumber += newLineFlag; 168 m_currentLine += newLineFlag; 169 if (newLineFlag) 170 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 171 --m_currentString.m_length; 172 m_currentChar = ++m_currentString.m_current; 173 return; 174 } 175 advanceSlowCase(lineNumber); 176 } 177 advancePastNonNewline()178 void advancePastNonNewline() 179 { 180 ASSERT(*current() != '\n'); 181 if (!m_pushedChar1 && m_currentString.m_length > 1) { 182 --m_currentString.m_length; 183 m_currentChar = ++m_currentString.m_current; 184 return; 185 } 186 advanceSlowCase(); 187 } 188 advance(int & lineNumber)189 void advance(int& lineNumber) 190 { 191 if (!m_pushedChar1 && m_currentString.m_length > 1) { 192 int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers(); 193 lineNumber += newLineFlag; 194 m_currentLine += newLineFlag; 195 if (newLineFlag) 196 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 197 --m_currentString.m_length; 198 m_currentChar = ++m_currentString.m_current; 199 return; 200 } 201 advanceSlowCase(lineNumber); 202 } 203 204 // Writes the consumed characters into consumedCharacters, which must 205 // have space for at least |count| characters. 206 void advance(unsigned count, UChar* consumedCharacters); 207 escaped()208 bool escaped() const { return m_pushedChar1; } 209 numberOfCharactersConsumed()210 int numberOfCharactersConsumed() const 211 { 212 int numberOfPushedCharacters = 0; 213 if (m_pushedChar1) { 214 ++numberOfPushedCharacters; 215 if (m_pushedChar2) 216 ++numberOfPushedCharacters; 217 } 218 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 219 } 220 221 String toString() const; 222 223 const UChar& operator*() const { return *current(); } 224 const UChar* operator->() const { return current(); } 225 226 227 // The method is moderately slow, comparing to currentLine method. 228 WTF::ZeroBasedNumber currentColumn() const; 229 WTF::ZeroBasedNumber currentLine() const; 230 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog 231 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. 232 void setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength); 233 234 private: 235 void append(const SegmentedSubstring&); 236 void prepend(const SegmentedSubstring&); 237 238 void advanceSlowCase(); 239 void advanceSlowCase(int& lineNumber); 240 void advanceSubstring(); current()241 const UChar* current() const { return m_currentChar; } 242 equalsLiterally(const UChar * str1,const UChar * str2,size_t count)243 static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); } equalsIgnoringCase(const UChar * str1,const UChar * str2,size_t count)244 static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); } 245 246 template<bool equals(const UChar* str1, const UChar* str2, size_t count)> lookAheadInline(const String & string)247 inline LookAheadResult lookAheadInline(const String& string) 248 { 249 if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { 250 if (equals(string.characters(), m_currentString.m_current, string.length())) 251 return DidMatch; 252 return DidNotMatch; 253 } 254 return lookAheadSlowCase<equals>(string); 255 } 256 257 template<bool equals(const UChar* str1, const UChar* str2, size_t count)> lookAheadSlowCase(const String & string)258 LookAheadResult lookAheadSlowCase(const String& string) 259 { 260 unsigned count = string.length(); 261 if (count > length()) 262 return NotEnoughCharacters; 263 UChar* consumedCharacters; 264 String consumedString = String::createUninitialized(count, consumedCharacters); 265 advance(count, consumedCharacters); 266 LookAheadResult result = DidNotMatch; 267 if (equals(string.characters(), consumedCharacters, count)) 268 result = DidMatch; 269 prepend(SegmentedString(consumedString)); 270 return result; 271 } 272 isComposite()273 bool isComposite() const { return !m_substrings.isEmpty(); } 274 275 UChar m_pushedChar1; 276 UChar m_pushedChar2; 277 SegmentedSubstring m_currentString; 278 const UChar* m_currentChar; 279 int m_numberOfCharactersConsumedPriorToCurrentString; 280 int m_numberOfCharactersConsumedPriorToCurrentLine; 281 int m_currentLine; 282 Deque<SegmentedSubstring> m_substrings; 283 bool m_closed; 284 }; 285 286 } 287 288 #endif 289