1 /*
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef AtomicHTMLToken_h
27 #define AtomicHTMLToken_h
28
29 #include "core/HTMLElementLookupTrie.h"
30 #include "core/dom/Attribute.h"
31 #include "core/html/parser/CompactHTMLToken.h"
32 #include "core/html/parser/HTMLToken.h"
33 #include "wtf/RefCounted.h"
34 #include "wtf/RefPtr.h"
35
36 namespace WebCore {
37
38 class AtomicHTMLToken {
39 WTF_MAKE_NONCOPYABLE(AtomicHTMLToken);
40 public:
41
forceQuirks()42 bool forceQuirks() const
43 {
44 ASSERT(m_type == HTMLToken::DOCTYPE);
45 return m_doctypeData->m_forceQuirks;
46 }
47
type()48 HTMLToken::Type type() const { return m_type; }
49
name()50 const AtomicString& name() const
51 {
52 ASSERT(usesName());
53 return m_name;
54 }
55
setName(const AtomicString & name)56 void setName(const AtomicString& name)
57 {
58 ASSERT(usesName());
59 m_name = name;
60 }
61
selfClosing()62 bool selfClosing() const
63 {
64 ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
65 return m_selfClosing;
66 }
67
getAttributeItem(const QualifiedName & attributeName)68 Attribute* getAttributeItem(const QualifiedName& attributeName)
69 {
70 ASSERT(usesAttributes());
71 return findAttributeInVector(m_attributes, attributeName);
72 }
73
attributes()74 Vector<Attribute>& attributes()
75 {
76 ASSERT(usesAttributes());
77 return m_attributes;
78 }
79
attributes()80 const Vector<Attribute>& attributes() const
81 {
82 ASSERT(usesAttributes());
83 return m_attributes;
84 }
85
characters()86 const String& characters() const
87 {
88 ASSERT(m_type == HTMLToken::Character);
89 return m_data;
90 }
91
comment()92 const String& comment() const
93 {
94 ASSERT(m_type == HTMLToken::Comment);
95 return m_data;
96 }
97
98 // FIXME: Distinguish between a missing public identifer and an empty one.
publicIdentifier()99 Vector<UChar>& publicIdentifier() const
100 {
101 ASSERT(m_type == HTMLToken::DOCTYPE);
102 return m_doctypeData->m_publicIdentifier;
103 }
104
105 // FIXME: Distinguish between a missing system identifer and an empty one.
systemIdentifier()106 Vector<UChar>& systemIdentifier() const
107 {
108 ASSERT(m_type == HTMLToken::DOCTYPE);
109 return m_doctypeData->m_systemIdentifier;
110 }
111
AtomicHTMLToken(HTMLToken & token)112 explicit AtomicHTMLToken(HTMLToken& token)
113 : m_type(token.type())
114 {
115 switch (m_type) {
116 case HTMLToken::Uninitialized:
117 ASSERT_NOT_REACHED();
118 break;
119 case HTMLToken::DOCTYPE:
120 m_name = AtomicString(token.name());
121 m_doctypeData = token.releaseDoctypeData();
122 break;
123 case HTMLToken::EndOfFile:
124 break;
125 case HTMLToken::StartTag:
126 case HTMLToken::EndTag: {
127 m_selfClosing = token.selfClosing();
128 if (StringImpl* tagName = lookupHTMLTag(token.name().data(), token.name().size()))
129 m_name = AtomicString(tagName);
130 else
131 m_name = AtomicString(token.name());
132 initializeAttributes(token.attributes());
133 break;
134 }
135 case HTMLToken::Character:
136 case HTMLToken::Comment:
137 if (token.isAll8BitData())
138 m_data = String::make8BitFrom16BitSource(token.data());
139 else
140 m_data = String(token.data());
141 break;
142 }
143 }
144
AtomicHTMLToken(const CompactHTMLToken & token)145 explicit AtomicHTMLToken(const CompactHTMLToken& token)
146 : m_type(token.type())
147 {
148 switch (m_type) {
149 case HTMLToken::Uninitialized:
150 ASSERT_NOT_REACHED();
151 break;
152 case HTMLToken::DOCTYPE:
153 m_name = AtomicString(token.data());
154 m_doctypeData = adoptPtr(new DoctypeData());
155 m_doctypeData->m_hasPublicIdentifier = true;
156 append(m_doctypeData->m_publicIdentifier, token.publicIdentifier());
157 m_doctypeData->m_hasSystemIdentifier = true;
158 append(m_doctypeData->m_systemIdentifier, token.systemIdentifier());
159 m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks();
160 break;
161 case HTMLToken::EndOfFile:
162 break;
163 case HTMLToken::StartTag:
164 m_attributes.reserveInitialCapacity(token.attributes().size());
165 for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) {
166 QualifiedName name(nullAtom, AtomicString(it->name), nullAtom);
167 // FIXME: This is N^2 for the number of attributes.
168 if (!findAttributeInVector(m_attributes, name))
169 m_attributes.append(Attribute(name, AtomicString(it->value)));
170 }
171 // Fall through!
172 case HTMLToken::EndTag:
173 m_selfClosing = token.selfClosing();
174 m_name = AtomicString(token.data());
175 break;
176 case HTMLToken::Character:
177 case HTMLToken::Comment:
178 m_data = token.data();
179 break;
180 }
181 }
182
AtomicHTMLToken(HTMLToken::Type type)183 explicit AtomicHTMLToken(HTMLToken::Type type)
184 : m_type(type)
185 , m_selfClosing(false)
186 {
187 }
188
189 AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>())
m_type(type)190 : m_type(type)
191 , m_name(name)
192 , m_selfClosing(false)
193 , m_attributes(attributes)
194 {
195 ASSERT(usesName());
196 }
197
198 private:
199 HTMLToken::Type m_type;
200
201 void initializeAttributes(const HTMLToken::AttributeList& attributes);
202 QualifiedName nameForAttribute(const HTMLToken::Attribute&) const;
203
204 bool usesName() const;
205
206 bool usesAttributes() const;
207
208 // "name" for DOCTYPE, StartTag, and EndTag
209 AtomicString m_name;
210
211 // "data" for Comment, "characters" for Character
212 String m_data;
213
214 // For DOCTYPE
215 OwnPtr<DoctypeData> m_doctypeData;
216
217 // For StartTag and EndTag
218 bool m_selfClosing;
219
220 Vector<Attribute> m_attributes;
221 };
222
initializeAttributes(const HTMLToken::AttributeList & attributes)223 inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes)
224 {
225 size_t size = attributes.size();
226 if (!size)
227 return;
228
229 m_attributes.clear();
230 m_attributes.reserveInitialCapacity(size);
231 for (size_t i = 0; i < size; ++i) {
232 const HTMLToken::Attribute& attribute = attributes[i];
233 if (attribute.name.isEmpty())
234 continue;
235
236 ASSERT(attribute.nameRange.start);
237 ASSERT(attribute.nameRange.end);
238 ASSERT(attribute.valueRange.start);
239 ASSERT(attribute.valueRange.end);
240
241 AtomicString value(attribute.value);
242 const QualifiedName& name = nameForAttribute(attribute);
243 // FIXME: This is N^2 for the number of attributes.
244 if (!findAttributeInVector(m_attributes, name))
245 m_attributes.append(Attribute(name, value));
246 }
247 }
248
249 }
250
251 #endif
252