• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef HTMLToken_h
27 #define HTMLToken_h
28 
29 #include "core/dom/Attribute.h"
30 #include "core/html/parser/HTMLToken.h"
31 #include "wtf/PassOwnPtr.h"
32 #include "wtf/RefCounted.h"
33 #include "wtf/RefPtr.h"
34 
35 namespace WebCore {
36 
37 class DoctypeData {
38     WTF_MAKE_NONCOPYABLE(DoctypeData);
39 public:
DoctypeData()40     DoctypeData()
41         : m_hasPublicIdentifier(false)
42         , m_hasSystemIdentifier(false)
43         , m_forceQuirks(false)
44     {
45     }
46 
47     // FIXME: This should use String instead of Vector<UChar>.
48     bool m_hasPublicIdentifier;
49     bool m_hasSystemIdentifier;
50     WTF::Vector<UChar> m_publicIdentifier;
51     WTF::Vector<UChar> m_systemIdentifier;
52     bool m_forceQuirks;
53 };
54 
findAttributeInVector(Vector<Attribute> & attributes,const QualifiedName & name)55 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
56 {
57     for (unsigned i = 0; i < attributes.size(); ++i) {
58         if (attributes.at(i).name().matches(name))
59             return &attributes.at(i);
60     }
61     return 0;
62 }
63 
64 class HTMLToken {
65     WTF_MAKE_NONCOPYABLE(HTMLToken);
66     WTF_MAKE_FAST_ALLOCATED;
67 public:
68     enum Type {
69         Uninitialized,
70         DOCTYPE,
71         StartTag,
72         EndTag,
73         Comment,
74         Character,
75         EndOfFile,
76     };
77 
78     class Attribute {
79     public:
80         class Range {
81         public:
82             int start;
83             int end;
84         };
85 
86         Range nameRange;
87         Range valueRange;
88         Vector<UChar, 32> name;
89         Vector<UChar, 32> value;
90     };
91 
92     typedef Vector<Attribute, 10> AttributeList;
93 
94     // By using an inline capacity of 256, we avoid spilling over into an malloced buffer
95     // approximately 99% of the time based on a non-scientific browse around a number of
96     // popular web sites on 23 May 2013.
97     typedef Vector<UChar, 256> DataVector;
98 
HTMLToken()99     HTMLToken() { clear(); }
100 
clear()101     void clear()
102     {
103         m_type = Uninitialized;
104         m_range.start = 0;
105         m_range.end = 0;
106         m_baseOffset = 0;
107         m_data.clear();
108         m_orAllData = 0;
109     }
110 
isUninitialized()111     bool isUninitialized() { return m_type == Uninitialized; }
type()112     Type type() const { return m_type; }
113 
makeEndOfFile()114     void makeEndOfFile()
115     {
116         ASSERT(m_type == Uninitialized);
117         m_type = EndOfFile;
118     }
119 
120     /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */
startIndex()121     int startIndex() const { return m_range.start; }
endIndex()122     int endIndex() const { return m_range.end; }
123 
setBaseOffset(int offset)124     void setBaseOffset(int offset)
125     {
126         m_baseOffset = offset;
127     }
128 
end(int endOffset)129     void end(int endOffset)
130     {
131         m_range.end = endOffset - m_baseOffset;
132     }
133 
data()134     const DataVector& data() const
135     {
136         ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
137         return m_data;
138     }
139 
isAll8BitData()140     bool isAll8BitData() const
141     {
142         return (m_orAllData <= 0xff);
143     }
144 
name()145     const DataVector& name() const
146     {
147         ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
148         return m_data;
149     }
150 
appendToName(UChar character)151     void appendToName(UChar character)
152     {
153         ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
154         ASSERT(character);
155         m_data.append(character);
156         m_orAllData |= character;
157     }
158 
159     /* DOCTYPE Tokens */
160 
forceQuirks()161     bool forceQuirks() const
162     {
163         ASSERT(m_type == DOCTYPE);
164         return m_doctypeData->m_forceQuirks;
165     }
166 
setForceQuirks()167     void setForceQuirks()
168     {
169         ASSERT(m_type == DOCTYPE);
170         m_doctypeData->m_forceQuirks = true;
171     }
172 
beginDOCTYPE()173     void beginDOCTYPE()
174     {
175         ASSERT(m_type == Uninitialized);
176         m_type = DOCTYPE;
177         m_doctypeData = adoptPtr(new DoctypeData);
178     }
179 
beginDOCTYPE(UChar character)180     void beginDOCTYPE(UChar character)
181     {
182         ASSERT(character);
183         beginDOCTYPE();
184         m_data.append(character);
185         m_orAllData |= character;
186     }
187 
188     // FIXME: Distinguish between a missing public identifer and an empty one.
publicIdentifier()189     const WTF::Vector<UChar>& publicIdentifier() const
190     {
191         ASSERT(m_type == DOCTYPE);
192         return m_doctypeData->m_publicIdentifier;
193     }
194 
195     // FIXME: Distinguish between a missing system identifer and an empty one.
systemIdentifier()196     const WTF::Vector<UChar>& systemIdentifier() const
197     {
198         ASSERT(m_type == DOCTYPE);
199         return m_doctypeData->m_systemIdentifier;
200     }
201 
setPublicIdentifierToEmptyString()202     void setPublicIdentifierToEmptyString()
203     {
204         ASSERT(m_type == DOCTYPE);
205         m_doctypeData->m_hasPublicIdentifier = true;
206         m_doctypeData->m_publicIdentifier.clear();
207     }
208 
setSystemIdentifierToEmptyString()209     void setSystemIdentifierToEmptyString()
210     {
211         ASSERT(m_type == DOCTYPE);
212         m_doctypeData->m_hasSystemIdentifier = true;
213         m_doctypeData->m_systemIdentifier.clear();
214     }
215 
appendToPublicIdentifier(UChar character)216     void appendToPublicIdentifier(UChar character)
217     {
218         ASSERT(character);
219         ASSERT(m_type == DOCTYPE);
220         ASSERT(m_doctypeData->m_hasPublicIdentifier);
221         m_doctypeData->m_publicIdentifier.append(character);
222     }
223 
appendToSystemIdentifier(UChar character)224     void appendToSystemIdentifier(UChar character)
225     {
226         ASSERT(character);
227         ASSERT(m_type == DOCTYPE);
228         ASSERT(m_doctypeData->m_hasSystemIdentifier);
229         m_doctypeData->m_systemIdentifier.append(character);
230     }
231 
releaseDoctypeData()232     PassOwnPtr<DoctypeData> releaseDoctypeData()
233     {
234         return m_doctypeData.release();
235     }
236 
237     /* Start/End Tag Tokens */
238 
selfClosing()239     bool selfClosing() const
240     {
241         ASSERT(m_type == StartTag || m_type == EndTag);
242         return m_selfClosing;
243     }
244 
setSelfClosing()245     void setSelfClosing()
246     {
247         ASSERT(m_type == StartTag || m_type == EndTag);
248         m_selfClosing = true;
249     }
250 
beginStartTag(UChar character)251     void beginStartTag(UChar character)
252     {
253         ASSERT(character);
254         ASSERT(m_type == Uninitialized);
255         m_type = StartTag;
256         m_selfClosing = false;
257         m_currentAttribute = 0;
258         m_attributes.clear();
259 
260         m_data.append(character);
261         m_orAllData |= character;
262     }
263 
beginEndTag(LChar character)264     void beginEndTag(LChar character)
265     {
266         ASSERT(m_type == Uninitialized);
267         m_type = EndTag;
268         m_selfClosing = false;
269         m_currentAttribute = 0;
270         m_attributes.clear();
271 
272         m_data.append(character);
273     }
274 
beginEndTag(const Vector<LChar,32> & characters)275     void beginEndTag(const Vector<LChar, 32>& characters)
276     {
277         ASSERT(m_type == Uninitialized);
278         m_type = EndTag;
279         m_selfClosing = false;
280         m_currentAttribute = 0;
281         m_attributes.clear();
282 
283         m_data.appendVector(characters);
284     }
285 
addNewAttribute()286     void addNewAttribute()
287     {
288         ASSERT(m_type == StartTag || m_type == EndTag);
289         m_attributes.grow(m_attributes.size() + 1);
290         m_currentAttribute = &m_attributes.last();
291 #ifndef NDEBUG
292         m_currentAttribute->nameRange.start = 0;
293         m_currentAttribute->nameRange.end = 0;
294         m_currentAttribute->valueRange.start = 0;
295         m_currentAttribute->valueRange.end = 0;
296 #endif
297     }
298 
beginAttributeName(int offset)299     void beginAttributeName(int offset)
300     {
301         m_currentAttribute->nameRange.start = offset - m_baseOffset;
302     }
303 
endAttributeName(int offset)304     void endAttributeName(int offset)
305     {
306         int index = offset - m_baseOffset;
307         m_currentAttribute->nameRange.end = index;
308         m_currentAttribute->valueRange.start = index;
309         m_currentAttribute->valueRange.end = index;
310     }
311 
beginAttributeValue(int offset)312     void beginAttributeValue(int offset)
313     {
314         m_currentAttribute->valueRange.start = offset - m_baseOffset;
315 #ifndef NDEBUG
316         m_currentAttribute->valueRange.end = 0;
317 #endif
318     }
319 
endAttributeValue(int offset)320     void endAttributeValue(int offset)
321     {
322         m_currentAttribute->valueRange.end = offset - m_baseOffset;
323     }
324 
appendToAttributeName(UChar character)325     void appendToAttributeName(UChar character)
326     {
327         ASSERT(character);
328         ASSERT(m_type == StartTag || m_type == EndTag);
329         // FIXME: We should be able to add the following ASSERT once we fix
330         // https://bugs.webkit.org/show_bug.cgi?id=62971
331         //   ASSERT(m_currentAttribute->nameRange.start);
332         m_currentAttribute->name.append(character);
333     }
334 
appendToAttributeValue(UChar character)335     void appendToAttributeValue(UChar character)
336     {
337         ASSERT(character);
338         ASSERT(m_type == StartTag || m_type == EndTag);
339         ASSERT(m_currentAttribute->valueRange.start);
340         m_currentAttribute->value.append(character);
341     }
342 
appendToAttributeValue(size_t i,const String & value)343     void appendToAttributeValue(size_t i, const String& value)
344     {
345         ASSERT(!value.isEmpty());
346         ASSERT(m_type == StartTag || m_type == EndTag);
347         append(m_attributes[i].value, value);
348     }
349 
attributes()350     const AttributeList& attributes() const
351     {
352         ASSERT(m_type == StartTag || m_type == EndTag);
353         return m_attributes;
354     }
355 
getAttributeItem(const QualifiedName & name)356     const Attribute* getAttributeItem(const QualifiedName& name) const
357     {
358         for (unsigned i = 0; i < m_attributes.size(); ++i) {
359             if (AtomicString(m_attributes.at(i).name) == name.localName())
360                 return &m_attributes.at(i);
361         }
362         return 0;
363     }
364 
365     // Used by the XSSAuditor to nuke XSS-laden attributes.
eraseValueOfAttribute(size_t i)366     void eraseValueOfAttribute(size_t i)
367     {
368         ASSERT(m_type == StartTag || m_type == EndTag);
369         m_attributes[i].value.clear();
370     }
371 
372     /* Character Tokens */
373 
374     // Starting a character token works slightly differently than starting
375     // other types of tokens because we want to save a per-character branch.
ensureIsCharacterToken()376     void ensureIsCharacterToken()
377     {
378         ASSERT(m_type == Uninitialized || m_type == Character);
379         m_type = Character;
380     }
381 
characters()382     const DataVector& characters() const
383     {
384         ASSERT(m_type == Character);
385         return m_data;
386     }
387 
appendToCharacter(char character)388     void appendToCharacter(char character)
389     {
390         ASSERT(m_type == Character);
391         m_data.append(character);
392     }
393 
appendToCharacter(UChar character)394     void appendToCharacter(UChar character)
395     {
396         ASSERT(m_type == Character);
397         m_data.append(character);
398         m_orAllData |= character;
399     }
400 
appendToCharacter(const Vector<LChar,32> & characters)401     void appendToCharacter(const Vector<LChar, 32>& characters)
402     {
403         ASSERT(m_type == Character);
404         m_data.appendVector(characters);
405     }
406 
407     /* Comment Tokens */
408 
comment()409     const DataVector& comment() const
410     {
411         ASSERT(m_type == Comment);
412         return m_data;
413     }
414 
beginComment()415     void beginComment()
416     {
417         ASSERT(m_type == Uninitialized);
418         m_type = Comment;
419     }
420 
appendToComment(UChar character)421     void appendToComment(UChar character)
422     {
423         ASSERT(character);
424         ASSERT(m_type == Comment);
425         m_data.append(character);
426         m_orAllData |= character;
427     }
428 
eraseCharacters()429     void eraseCharacters()
430     {
431         ASSERT(m_type == Character);
432         m_data.clear();
433         m_orAllData = 0;
434     }
435 
436 private:
437     Type m_type;
438     Attribute::Range m_range; // Always starts at zero.
439     int m_baseOffset;
440     DataVector m_data;
441     UChar m_orAllData;
442 
443     // For StartTag and EndTag
444     bool m_selfClosing;
445     AttributeList m_attributes;
446 
447     // A pointer into m_attributes used during lexing.
448     Attribute* m_currentAttribute;
449 
450     // For DOCTYPE
451     OwnPtr<DoctypeData> m_doctypeData;
452 };
453 
454 }
455 
456 #endif
457