• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef HTMLToken_h
27 #define HTMLToken_h
28 
29 #include "core/dom/Attribute.h"
30 #include "wtf/PassOwnPtr.h"
31 #include "wtf/RefCounted.h"
32 #include "wtf/RefPtr.h"
33 
34 namespace WebCore {
35 
36 class DoctypeData {
37     WTF_MAKE_NONCOPYABLE(DoctypeData);
38 public:
DoctypeData()39     DoctypeData()
40         : m_hasPublicIdentifier(false)
41         , m_hasSystemIdentifier(false)
42         , m_forceQuirks(false)
43     {
44     }
45 
46     // FIXME: This should use String instead of Vector<UChar>.
47     bool m_hasPublicIdentifier;
48     bool m_hasSystemIdentifier;
49     WTF::Vector<UChar> m_publicIdentifier;
50     WTF::Vector<UChar> m_systemIdentifier;
51     bool m_forceQuirks;
52 };
53 
findAttributeInVector(Vector<Attribute> & attributes,const QualifiedName & name)54 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
55 {
56     for (unsigned i = 0; i < attributes.size(); ++i) {
57         if (attributes.at(i).name().matches(name))
58             return &attributes.at(i);
59     }
60     return 0;
61 }
62 
63 class HTMLToken {
64     WTF_MAKE_NONCOPYABLE(HTMLToken);
65     WTF_MAKE_FAST_ALLOCATED;
66 public:
67     enum Type {
68         Uninitialized,
69         DOCTYPE,
70         StartTag,
71         EndTag,
72         Comment,
73         Character,
74         EndOfFile,
75     };
76 
77     class Attribute {
78     public:
79         class Range {
80         public:
81             int start;
82             int end;
83         };
84 
85         Range nameRange;
86         Range valueRange;
87         Vector<UChar, 32> name;
88         Vector<UChar, 32> value;
89     };
90 
91     typedef Vector<Attribute, 10> AttributeList;
92 
93     // By using an inline capacity of 256, we avoid spilling over into an malloced buffer
94     // approximately 99% of the time based on a non-scientific browse around a number of
95     // popular web sites on 23 May 2013.
96     typedef Vector<UChar, 256> DataVector;
97 
HTMLToken()98     HTMLToken() { clear(); }
99 
clear()100     void clear()
101     {
102         m_type = Uninitialized;
103         m_range.start = 0;
104         m_range.end = 0;
105         m_baseOffset = 0;
106         // Don't call Vector::clear() as that would destroy the
107         // alloced VectorBuffer. If the innerHTML'd content has
108         // two 257 character text nodes in a row, we'll needlessly
109         // thrash malloc. When we finally finish the parse the
110         // HTMLToken will be destroyed and the VectorBuffer released.
111         m_data.shrink(0);
112         m_orAllData = 0;
113     }
114 
isUninitialized()115     bool isUninitialized() { return m_type == Uninitialized; }
type()116     Type type() const { return m_type; }
117 
makeEndOfFile()118     void makeEndOfFile()
119     {
120         ASSERT(m_type == Uninitialized);
121         m_type = EndOfFile;
122     }
123 
124     /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */
startIndex()125     int startIndex() const { return m_range.start; }
endIndex()126     int endIndex() const { return m_range.end; }
127 
setBaseOffset(int offset)128     void setBaseOffset(int offset)
129     {
130         m_baseOffset = offset;
131     }
132 
end(int endOffset)133     void end(int endOffset)
134     {
135         m_range.end = endOffset - m_baseOffset;
136     }
137 
data()138     const DataVector& data() const
139     {
140         ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
141         return m_data;
142     }
143 
isAll8BitData()144     bool isAll8BitData() const
145     {
146         return (m_orAllData <= 0xff);
147     }
148 
name()149     const DataVector& name() const
150     {
151         ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
152         return m_data;
153     }
154 
appendToName(UChar character)155     void appendToName(UChar character)
156     {
157         ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
158         ASSERT(character);
159         m_data.append(character);
160         m_orAllData |= character;
161     }
162 
163     /* DOCTYPE Tokens */
164 
forceQuirks()165     bool forceQuirks() const
166     {
167         ASSERT(m_type == DOCTYPE);
168         return m_doctypeData->m_forceQuirks;
169     }
170 
setForceQuirks()171     void setForceQuirks()
172     {
173         ASSERT(m_type == DOCTYPE);
174         m_doctypeData->m_forceQuirks = true;
175     }
176 
beginDOCTYPE()177     void beginDOCTYPE()
178     {
179         ASSERT(m_type == Uninitialized);
180         m_type = DOCTYPE;
181         m_doctypeData = adoptPtr(new DoctypeData);
182     }
183 
beginDOCTYPE(UChar character)184     void beginDOCTYPE(UChar character)
185     {
186         ASSERT(character);
187         beginDOCTYPE();
188         m_data.append(character);
189         m_orAllData |= character;
190     }
191 
192     // FIXME: Distinguish between a missing public identifer and an empty one.
publicIdentifier()193     const WTF::Vector<UChar>& publicIdentifier() const
194     {
195         ASSERT(m_type == DOCTYPE);
196         return m_doctypeData->m_publicIdentifier;
197     }
198 
199     // FIXME: Distinguish between a missing system identifer and an empty one.
systemIdentifier()200     const WTF::Vector<UChar>& systemIdentifier() const
201     {
202         ASSERT(m_type == DOCTYPE);
203         return m_doctypeData->m_systemIdentifier;
204     }
205 
setPublicIdentifierToEmptyString()206     void setPublicIdentifierToEmptyString()
207     {
208         ASSERT(m_type == DOCTYPE);
209         m_doctypeData->m_hasPublicIdentifier = true;
210         m_doctypeData->m_publicIdentifier.clear();
211     }
212 
setSystemIdentifierToEmptyString()213     void setSystemIdentifierToEmptyString()
214     {
215         ASSERT(m_type == DOCTYPE);
216         m_doctypeData->m_hasSystemIdentifier = true;
217         m_doctypeData->m_systemIdentifier.clear();
218     }
219 
appendToPublicIdentifier(UChar character)220     void appendToPublicIdentifier(UChar character)
221     {
222         ASSERT(character);
223         ASSERT(m_type == DOCTYPE);
224         ASSERT(m_doctypeData->m_hasPublicIdentifier);
225         m_doctypeData->m_publicIdentifier.append(character);
226     }
227 
appendToSystemIdentifier(UChar character)228     void appendToSystemIdentifier(UChar character)
229     {
230         ASSERT(character);
231         ASSERT(m_type == DOCTYPE);
232         ASSERT(m_doctypeData->m_hasSystemIdentifier);
233         m_doctypeData->m_systemIdentifier.append(character);
234     }
235 
releaseDoctypeData()236     PassOwnPtr<DoctypeData> releaseDoctypeData()
237     {
238         return m_doctypeData.release();
239     }
240 
241     /* Start/End Tag Tokens */
242 
selfClosing()243     bool selfClosing() const
244     {
245         ASSERT(m_type == StartTag || m_type == EndTag);
246         return m_selfClosing;
247     }
248 
setSelfClosing()249     void setSelfClosing()
250     {
251         ASSERT(m_type == StartTag || m_type == EndTag);
252         m_selfClosing = true;
253     }
254 
beginStartTag(UChar character)255     void beginStartTag(UChar character)
256     {
257         ASSERT(character);
258         ASSERT(m_type == Uninitialized);
259         m_type = StartTag;
260         m_selfClosing = false;
261         m_currentAttribute = 0;
262         m_attributes.clear();
263 
264         m_data.append(character);
265         m_orAllData |= character;
266     }
267 
beginEndTag(LChar character)268     void beginEndTag(LChar character)
269     {
270         ASSERT(m_type == Uninitialized);
271         m_type = EndTag;
272         m_selfClosing = false;
273         m_currentAttribute = 0;
274         m_attributes.clear();
275 
276         m_data.append(character);
277     }
278 
beginEndTag(const Vector<LChar,32> & characters)279     void beginEndTag(const Vector<LChar, 32>& characters)
280     {
281         ASSERT(m_type == Uninitialized);
282         m_type = EndTag;
283         m_selfClosing = false;
284         m_currentAttribute = 0;
285         m_attributes.clear();
286 
287         m_data.appendVector(characters);
288     }
289 
addNewAttribute()290     void addNewAttribute()
291     {
292         ASSERT(m_type == StartTag || m_type == EndTag);
293         m_attributes.grow(m_attributes.size() + 1);
294         m_currentAttribute = &m_attributes.last();
295 #ifndef NDEBUG
296         m_currentAttribute->nameRange.start = 0;
297         m_currentAttribute->nameRange.end = 0;
298         m_currentAttribute->valueRange.start = 0;
299         m_currentAttribute->valueRange.end = 0;
300 #endif
301     }
302 
beginAttributeName(int offset)303     void beginAttributeName(int offset)
304     {
305         m_currentAttribute->nameRange.start = offset - m_baseOffset;
306     }
307 
endAttributeName(int offset)308     void endAttributeName(int offset)
309     {
310         int index = offset - m_baseOffset;
311         m_currentAttribute->nameRange.end = index;
312         m_currentAttribute->valueRange.start = index;
313         m_currentAttribute->valueRange.end = index;
314     }
315 
beginAttributeValue(int offset)316     void beginAttributeValue(int offset)
317     {
318         m_currentAttribute->valueRange.start = offset - m_baseOffset;
319 #ifndef NDEBUG
320         m_currentAttribute->valueRange.end = 0;
321 #endif
322     }
323 
endAttributeValue(int offset)324     void endAttributeValue(int offset)
325     {
326         m_currentAttribute->valueRange.end = offset - m_baseOffset;
327     }
328 
appendToAttributeName(UChar character)329     void appendToAttributeName(UChar character)
330     {
331         ASSERT(character);
332         ASSERT(m_type == StartTag || m_type == EndTag);
333         ASSERT(m_currentAttribute->nameRange.start);
334         m_currentAttribute->name.append(character);
335     }
336 
appendToAttributeValue(UChar character)337     void appendToAttributeValue(UChar character)
338     {
339         ASSERT(character);
340         ASSERT(m_type == StartTag || m_type == EndTag);
341         ASSERT(m_currentAttribute->valueRange.start);
342         m_currentAttribute->value.append(character);
343     }
344 
appendToAttributeValue(size_t i,const String & value)345     void appendToAttributeValue(size_t i, const String& value)
346     {
347         ASSERT(!value.isEmpty());
348         ASSERT(m_type == StartTag || m_type == EndTag);
349         append(m_attributes[i].value, value);
350     }
351 
attributes()352     const AttributeList& attributes() const
353     {
354         ASSERT(m_type == StartTag || m_type == EndTag);
355         return m_attributes;
356     }
357 
getAttributeItem(const QualifiedName & name)358     const Attribute* getAttributeItem(const QualifiedName& name) const
359     {
360         for (unsigned i = 0; i < m_attributes.size(); ++i) {
361             if (AtomicString(m_attributes.at(i).name) == name.localName())
362                 return &m_attributes.at(i);
363         }
364         return 0;
365     }
366 
367     // Used by the XSSAuditor to nuke XSS-laden attributes.
eraseValueOfAttribute(size_t i)368     void eraseValueOfAttribute(size_t i)
369     {
370         ASSERT(m_type == StartTag || m_type == EndTag);
371         m_attributes[i].value.clear();
372     }
373 
374     /* Character Tokens */
375 
376     // Starting a character token works slightly differently than starting
377     // other types of tokens because we want to save a per-character branch.
ensureIsCharacterToken()378     void ensureIsCharacterToken()
379     {
380         ASSERT(m_type == Uninitialized || m_type == Character);
381         m_type = Character;
382     }
383 
characters()384     const DataVector& characters() const
385     {
386         ASSERT(m_type == Character);
387         return m_data;
388     }
389 
appendToCharacter(char character)390     void appendToCharacter(char character)
391     {
392         ASSERT(m_type == Character);
393         m_data.append(character);
394     }
395 
appendToCharacter(UChar character)396     void appendToCharacter(UChar character)
397     {
398         ASSERT(m_type == Character);
399         m_data.append(character);
400         m_orAllData |= character;
401     }
402 
appendToCharacter(const Vector<LChar,32> & characters)403     void appendToCharacter(const Vector<LChar, 32>& characters)
404     {
405         ASSERT(m_type == Character);
406         m_data.appendVector(characters);
407     }
408 
409     /* Comment Tokens */
410 
comment()411     const DataVector& comment() const
412     {
413         ASSERT(m_type == Comment);
414         return m_data;
415     }
416 
beginComment()417     void beginComment()
418     {
419         ASSERT(m_type == Uninitialized);
420         m_type = Comment;
421     }
422 
appendToComment(UChar character)423     void appendToComment(UChar character)
424     {
425         ASSERT(character);
426         ASSERT(m_type == Comment);
427         m_data.append(character);
428         m_orAllData |= character;
429     }
430 
431     // Only for XSSAuditor
eraseCharacters()432     void eraseCharacters()
433     {
434         ASSERT(m_type == Character);
435         m_data.clear();
436         m_orAllData = 0;
437     }
438 
439 private:
440     Type m_type;
441     Attribute::Range m_range; // Always starts at zero.
442     int m_baseOffset;
443     DataVector m_data;
444     UChar m_orAllData;
445 
446     // For StartTag and EndTag
447     bool m_selfClosing;
448     AttributeList m_attributes;
449 
450     // A pointer into m_attributes used during lexing.
451     Attribute* m_currentAttribute;
452 
453     // For DOCTYPE
454     OwnPtr<DoctypeData> m_doctypeData;
455 };
456 
457 }
458 
459 #endif
460