• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1.  Redistributions of source code must retain the above copyright
8  *     notice, this list of conditions and the following disclaimer.
9  * 2.  Redistributions in binary form must reproduce the above copyright
10  *     notice, this list of conditions and the following disclaimer in the
11  *     documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16  * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24 
25 #include "config.h"
26 #include "core/html/parser/HTMLParserIdioms.h"
27 
28 #include <limits>
29 #include "wtf/MathExtras.h"
30 #include "wtf/text/AtomicString.h"
31 #include "wtf/text/StringBuilder.h"
32 #include "wtf/text/StringHash.h"
33 
34 namespace WebCore {
35 
36 template <typename CharType>
stripLeadingAndTrailingHTMLSpaces(String string,const CharType * characters,unsigned length)37 static String stripLeadingAndTrailingHTMLSpaces(String string, const CharType* characters, unsigned length)
38 {
39     unsigned numLeadingSpaces = 0;
40     unsigned numTrailingSpaces = 0;
41 
42     for (; numLeadingSpaces < length; ++numLeadingSpaces) {
43         if (isNotHTMLSpace<CharType>(characters[numLeadingSpaces]))
44             break;
45     }
46 
47     if (numLeadingSpaces == length)
48         return string.isNull() ? string : emptyAtom.string();
49 
50     for (; numTrailingSpaces < length; ++numTrailingSpaces) {
51         if (isNotHTMLSpace<CharType>(characters[length - numTrailingSpaces - 1]))
52             break;
53     }
54 
55     ASSERT(numLeadingSpaces + numTrailingSpaces < length);
56 
57     if (!(numLeadingSpaces | numTrailingSpaces))
58         return string;
59 
60     return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
61 }
62 
stripLeadingAndTrailingHTMLSpaces(const String & string)63 String stripLeadingAndTrailingHTMLSpaces(const String& string)
64 {
65     unsigned length = string.length();
66 
67     if (!length)
68         return string.isNull() ? string : emptyAtom.string();
69 
70     if (string.is8Bit())
71         return stripLeadingAndTrailingHTMLSpaces<LChar>(string, string.characters8(), length);
72 
73     return stripLeadingAndTrailingHTMLSpaces<UChar>(string, string.characters16(), length);
74 }
75 
serializeForNumberType(const Decimal & number)76 String serializeForNumberType(const Decimal& number)
77 {
78     if (number.isZero()) {
79         // Decimal::toString appends exponent, e.g. "0e-18"
80         return number.isNegative() ? "-0" : "0";
81     }
82     return number.toString();
83 }
84 
serializeForNumberType(double number)85 String serializeForNumberType(double number)
86 {
87     // According to HTML5, "the best representation of the number n as a floating
88     // point number" is a string produced by applying ToString() to n.
89     return String::numberToStringECMAScript(number);
90 }
91 
parseToDecimalForNumberType(const String & string,const Decimal & fallbackValue)92 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
93 {
94     // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType
95 
96     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
97     const UChar firstCharacter = string[0];
98     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
99         return fallbackValue;
100 
101     const Decimal value = Decimal::fromString(string);
102     if (!value.isFinite())
103         return fallbackValue;
104 
105     // Numbers are considered finite IEEE 754 single-precision floating point values.
106     // See HTML5 2.5.4.3 `Real numbers.'
107     // FIXME: We should use numeric_limits<double>::max for number input type.
108     const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
109     if (value < -floatMax || value > floatMax)
110         return fallbackValue;
111 
112     // We return +0 for -0 case.
113     return value.isZero() ? Decimal(0) : value;
114 }
115 
parseToDoubleForNumberType(const String & string,double fallbackValue)116 double parseToDoubleForNumberType(const String& string, double fallbackValue)
117 {
118     // See HTML5 2.5.4.3 `Real numbers.'
119 
120     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
121     UChar firstCharacter = string[0];
122     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
123         return fallbackValue;
124 
125     bool valid = false;
126     double value = string.toDouble(&valid);
127     if (!valid)
128         return fallbackValue;
129 
130     // NaN and infinity are considered valid by String::toDouble, but not valid here.
131     if (!std::isfinite(value))
132         return fallbackValue;
133 
134     // Numbers are considered finite IEEE 754 single-precision floating point values.
135     // See HTML5 2.5.4.3 `Real numbers.'
136     if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
137         return fallbackValue;
138 
139     // The following expression converts -0 to +0.
140     return value ? value : 0;
141 }
142 
143 template <typename CharacterType>
parseHTMLIntegerInternal(const CharacterType * position,const CharacterType * end,int & value)144 static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
145 {
146     // Step 3
147     int sign = 1;
148 
149     // Step 4
150     while (position < end) {
151         if (!isHTMLSpace<CharacterType>(*position))
152             break;
153         ++position;
154     }
155 
156     // Step 5
157     if (position == end)
158         return false;
159     ASSERT(position < end);
160 
161     // Step 6
162     if (*position == '-') {
163         sign = -1;
164         ++position;
165     } else if (*position == '+')
166         ++position;
167     if (position == end)
168         return false;
169     ASSERT(position < end);
170 
171     // Step 7
172     if (!isASCIIDigit(*position))
173         return false;
174 
175     // Step 8
176     StringBuilder digits;
177     while (position < end) {
178         if (!isASCIIDigit(*position))
179             break;
180         digits.append(*position++);
181     }
182 
183     // Step 9
184     bool ok;
185     if (digits.is8Bit())
186         value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
187     else
188         value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
189     return ok;
190 }
191 
192 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
parseHTMLInteger(const String & input,int & value)193 bool parseHTMLInteger(const String& input, int& value)
194 {
195     // Step 1
196     // Step 2
197     unsigned length = input.length();
198     if (!length || input.is8Bit()) {
199         const LChar* start = input.characters8();
200         return parseHTMLIntegerInternal(start, start + length, value);
201     }
202 
203     const UChar* start = input.characters16();
204     return parseHTMLIntegerInternal(start, start + length, value);
205 }
206 
207 template <typename CharacterType>
parseHTMLNonNegativeIntegerInternal(const CharacterType * position,const CharacterType * end,unsigned & value)208 static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
209 {
210     // Step 3
211     while (position < end) {
212         if (!isHTMLSpace<CharacterType>(*position))
213             break;
214         ++position;
215     }
216 
217     // Step 4
218     if (position == end)
219         return false;
220     ASSERT(position < end);
221 
222     // Step 5
223     if (*position == '+')
224         ++position;
225 
226     // Step 6
227     if (position == end)
228         return false;
229     ASSERT(position < end);
230 
231     // Step 7
232     if (!isASCIIDigit(*position))
233         return false;
234 
235     // Step 8
236     StringBuilder digits;
237     while (position < end) {
238         if (!isASCIIDigit(*position))
239             break;
240         digits.append(*position++);
241     }
242 
243     // Step 9
244     bool ok;
245     if (digits.is8Bit())
246         value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
247     else
248         value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
249     return ok;
250 }
251 
252 
253 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
parseHTMLNonNegativeInteger(const String & input,unsigned & value)254 bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
255 {
256     // Step 1
257     // Step 2
258     unsigned length = input.length();
259     if (length && input.is8Bit()) {
260         const LChar* start = input.characters8();
261         return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
262     }
263 
264     const UChar* start = input.characters16();
265     return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
266 }
267 
threadSafeEqual(const StringImpl * a,const StringImpl * b)268 static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
269 {
270     if (a == b)
271         return true;
272     if (a->hash() != b->hash())
273         return false;
274     return equalNonNull(a, b);
275 }
276 
threadSafeMatch(const QualifiedName & a,const QualifiedName & b)277 bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
278 {
279     return threadSafeEqual(a.localName().impl(), b.localName().impl());
280 }
281 
threadSafeMatch(const String & localName,const QualifiedName & qName)282 bool threadSafeMatch(const String& localName, const QualifiedName& qName)
283 {
284     return threadSafeEqual(localName.impl(), qName.localName().impl());
285 }
286 
findStringIfStatic(const UChar * characters,unsigned length)287 StringImpl* findStringIfStatic(const UChar* characters, unsigned length)
288 {
289     // We don't need to try hashing if we know the string is too long.
290     if (length > StringImpl::highestStaticStringLength())
291         return 0;
292     // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
293     unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
294     const WTF::StaticStringsTable& table = StringImpl::allStaticStrings();
295     ASSERT(!table.isEmpty());
296 
297     WTF::StaticStringsTable::const_iterator it = table.find(hash);
298     if (it == table.end())
299         return 0;
300     // It's possible to have hash collisions between arbitrary strings and
301     // known identifiers (e.g. "bvvfg" collides with "script").
302     // However ASSERTs in StringImpl::createStatic guard against there ever being collisions
303     // between static strings.
304     if (!equal(it->value, characters, length))
305         return 0;
306     return it->value;
307 }
308 
309 }
310