1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29 #include "core/html/parser/HTMLPreloadScanner.h"
30
31 #include "core/HTMLNames.h"
32 #include "core/InputTypeNames.h"
33 #include "core/css/MediaList.h"
34 #include "core/css/MediaQueryEvaluator.h"
35 #include "core/css/MediaValues.h"
36 #include "core/css/parser/SizesAttributeParser.h"
37 #include "core/html/LinkRelAttribute.h"
38 #include "core/html/parser/HTMLParserIdioms.h"
39 #include "core/html/parser/HTMLSrcsetParser.h"
40 #include "core/html/parser/HTMLTokenizer.h"
41 #include "platform/RuntimeEnabledFeatures.h"
42 #include "platform/TraceEvent.h"
43 #include "wtf/MainThread.h"
44
45 namespace blink {
46
47 using namespace HTMLNames;
48
match(const StringImpl * impl,const QualifiedName & qName)49 static bool match(const StringImpl* impl, const QualifiedName& qName)
50 {
51 return impl == qName.localName().impl();
52 }
53
match(const AtomicString & name,const QualifiedName & qName)54 static bool match(const AtomicString& name, const QualifiedName& qName)
55 {
56 ASSERT(isMainThread());
57 return qName.localName() == name;
58 }
59
match(const String & name,const QualifiedName & qName)60 static bool match(const String& name, const QualifiedName& qName)
61 {
62 return threadSafeMatch(name, qName);
63 }
64
tagImplFor(const HTMLToken::DataVector & data)65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
66 {
67 AtomicString tagName(data);
68 const StringImpl* result = tagName.impl();
69 if (result->isStatic())
70 return result;
71 return 0;
72 }
73
tagImplFor(const String & tagName)74 static const StringImpl* tagImplFor(const String& tagName)
75 {
76 const StringImpl* result = tagName.impl();
77 if (result->isStatic())
78 return result;
79 return 0;
80 }
81
initiatorFor(const StringImpl * tagImpl)82 static String initiatorFor(const StringImpl* tagImpl)
83 {
84 ASSERT(tagImpl);
85 if (match(tagImpl, imgTag))
86 return imgTag.localName();
87 if (match(tagImpl, inputTag))
88 return inputTag.localName();
89 if (match(tagImpl, linkTag))
90 return linkTag.localName();
91 if (match(tagImpl, scriptTag))
92 return scriptTag.localName();
93 ASSERT_NOT_REACHED();
94 return emptyString();
95 }
96
mediaAttributeMatches(const MediaValues & mediaValues,const String & attributeValue)97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
98 {
99 RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
100 MediaQueryEvaluator mediaQueryEvaluator(mediaValues);
101 return mediaQueryEvaluator.eval(mediaQueries.get());
102 }
103
104 class TokenPreloadScanner::StartTagScanner {
105 public:
StartTagScanner(const StringImpl * tagImpl,PassRefPtr<MediaValues> mediaValues)106 StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
107 : m_tagImpl(tagImpl)
108 , m_linkIsStyleSheet(false)
109 , m_matchedMediaAttribute(true)
110 , m_inputIsImage(false)
111 , m_sourceSize(0)
112 , m_sourceSizeSet(false)
113 , m_isCORSEnabled(false)
114 , m_defer(FetchRequest::NoDefer)
115 , m_allowCredentials(DoNotAllowStoredCredentials)
116 , m_mediaValues(mediaValues)
117 {
118 if (match(m_tagImpl, imgTag)
119 || match(m_tagImpl, sourceTag)) {
120 if (RuntimeEnabledFeatures::pictureSizesEnabled())
121 m_sourceSize = SizesAttributeParser(m_mediaValues, String()).length();
122 return;
123 }
124 if ( !match(m_tagImpl, inputTag)
125 && !match(m_tagImpl, linkTag)
126 && !match(m_tagImpl, scriptTag))
127 m_tagImpl = 0;
128 }
129
130 enum URLReplacement {
131 AllowURLReplacement,
132 DisallowURLReplacement
133 };
134
processAttributes(const HTMLToken::AttributeList & attributes)135 void processAttributes(const HTMLToken::AttributeList& attributes)
136 {
137 ASSERT(isMainThread());
138 if (!m_tagImpl)
139 return;
140 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
141 AtomicString attributeName(iter->name);
142 String attributeValue = StringImpl::create8BitIfPossible(iter->value);
143 processAttribute(attributeName, attributeValue);
144 }
145 }
146
processAttributes(const Vector<CompactHTMLToken::Attribute> & attributes)147 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
148 {
149 if (!m_tagImpl)
150 return;
151 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
152 processAttribute(iter->name, iter->value);
153 }
154
handlePictureSourceURL(String & sourceURL)155 void handlePictureSourceURL(String& sourceURL)
156 {
157 if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty())
158 sourceURL = m_srcsetImageCandidate.toString();
159 else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty())
160 setUrlToLoad(sourceURL, AllowURLReplacement);
161 }
162
createPreloadRequest(const KURL & predictedBaseURL,const SegmentedString & source)163 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
164 {
165 if (!shouldPreload() || !m_matchedMediaAttribute)
166 return nullptr;
167
168 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
169 TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
170 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
171 if (isCORSEnabled())
172 request->setCrossOriginEnabled(allowStoredCredentials());
173 request->setCharset(charset());
174 request->setDefer(m_defer);
175 return request.release();
176 }
177
178 private:
179 template<typename NameType>
processScriptAttribute(const NameType & attributeName,const String & attributeValue)180 void processScriptAttribute(const NameType& attributeName, const String& attributeValue)
181 {
182 // FIXME - Don't set crossorigin multiple times.
183 if (match(attributeName, srcAttr))
184 setUrlToLoad(attributeValue, DisallowURLReplacement);
185 else if (match(attributeName, crossoriginAttr))
186 setCrossOriginAllowed(attributeValue);
187 else if (match(attributeName, asyncAttr))
188 setDefer(FetchRequest::LazyLoad);
189 else if (match(attributeName, deferAttr))
190 setDefer(FetchRequest::LazyLoad);
191 }
192
193 template<typename NameType>
processImgAttribute(const NameType & attributeName,const String & attributeValue)194 void processImgAttribute(const NameType& attributeName, const String& attributeValue)
195 {
196 if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
197 m_imgSrcUrl = attributeValue;
198 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
199 } else if (match(attributeName, crossoriginAttr)) {
200 setCrossOriginAllowed(attributeValue);
201 } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
202 m_srcsetAttributeValue = attributeValue;
203 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
204 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
205 } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
206 m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
207 m_sourceSizeSet = true;
208 if (!m_srcsetImageCandidate.isEmpty()) {
209 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
210 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
211 }
212 }
213 }
214
215 template<typename NameType>
processLinkAttribute(const NameType & attributeName,const String & attributeValue)216 void processLinkAttribute(const NameType& attributeName, const String& attributeValue)
217 {
218 // FIXME - Don't set rel/media/crossorigin multiple times.
219 if (match(attributeName, hrefAttr))
220 setUrlToLoad(attributeValue, DisallowURLReplacement);
221 else if (match(attributeName, relAttr))
222 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
223 else if (match(attributeName, mediaAttr))
224 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
225 else if (match(attributeName, crossoriginAttr))
226 setCrossOriginAllowed(attributeValue);
227 }
228
229 template<typename NameType>
processInputAttribute(const NameType & attributeName,const String & attributeValue)230 void processInputAttribute(const NameType& attributeName, const String& attributeValue)
231 {
232 // FIXME - Don't set type multiple times.
233 if (match(attributeName, srcAttr))
234 setUrlToLoad(attributeValue, DisallowURLReplacement);
235 else if (match(attributeName, typeAttr))
236 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
237 }
238
239 template<typename NameType>
processSourceAttribute(const NameType & attributeName,const String & attributeValue)240 void processSourceAttribute(const NameType& attributeName, const String& attributeValue)
241 {
242 if (!RuntimeEnabledFeatures::pictureEnabled())
243 return;
244 if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
245 m_srcsetAttributeValue = attributeValue;
246 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
247 } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) {
248 m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
249 m_sourceSizeSet = true;
250 if (!m_srcsetImageCandidate.isEmpty()) {
251 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
252 }
253 } else if (match(attributeName, mediaAttr)) {
254 // FIXME - Don't match media multiple times.
255 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
256 }
257
258 }
259
260 template<typename NameType>
processAttribute(const NameType & attributeName,const String & attributeValue)261 void processAttribute(const NameType& attributeName, const String& attributeValue)
262 {
263 if (match(attributeName, charsetAttr))
264 m_charset = attributeValue;
265
266 if (match(m_tagImpl, scriptTag))
267 processScriptAttribute(attributeName, attributeValue);
268 else if (match(m_tagImpl, imgTag))
269 processImgAttribute(attributeName, attributeValue);
270 else if (match(m_tagImpl, linkTag))
271 processLinkAttribute(attributeName, attributeValue);
272 else if (match(m_tagImpl, inputTag))
273 processInputAttribute(attributeName, attributeValue);
274 else if (match(m_tagImpl, sourceTag))
275 processSourceAttribute(attributeName, attributeValue);
276 }
277
relAttributeIsStyleSheet(const String & attributeValue)278 static bool relAttributeIsStyleSheet(const String& attributeValue)
279 {
280 LinkRelAttribute rel(attributeValue);
281 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
282 }
283
setUrlToLoad(const String & value,URLReplacement replacement)284 void setUrlToLoad(const String& value, URLReplacement replacement)
285 {
286 // We only respect the first src/href, per HTML5:
287 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
288 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
289 return;
290 String url = stripLeadingAndTrailingHTMLSpaces(value);
291 if (url.isEmpty())
292 return;
293 m_urlToLoad = url;
294 }
295
charset() const296 const String& charset() const
297 {
298 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
299 if (match(m_tagImpl, imgTag))
300 return emptyString();
301 return m_charset;
302 }
303
resourceType() const304 Resource::Type resourceType() const
305 {
306 if (match(m_tagImpl, scriptTag))
307 return Resource::Script;
308 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
309 return Resource::Image;
310 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
311 return Resource::CSSStyleSheet;
312 ASSERT_NOT_REACHED();
313 return Resource::Raw;
314 }
315
shouldPreload() const316 bool shouldPreload() const
317 {
318 if (m_urlToLoad.isEmpty())
319 return false;
320 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
321 return false;
322 if (match(m_tagImpl, inputTag) && !m_inputIsImage)
323 return false;
324 return true;
325 }
326
isCORSEnabled() const327 bool isCORSEnabled() const
328 {
329 return m_isCORSEnabled;
330 }
331
allowStoredCredentials() const332 StoredCredentials allowStoredCredentials() const
333 {
334 return m_allowCredentials;
335 }
336
setCrossOriginAllowed(const String & corsSetting)337 void setCrossOriginAllowed(const String& corsSetting)
338 {
339 m_isCORSEnabled = true;
340 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
341 m_allowCredentials = AllowStoredCredentials;
342 else
343 m_allowCredentials = DoNotAllowStoredCredentials;
344 }
345
setDefer(FetchRequest::DeferOption defer)346 void setDefer(FetchRequest::DeferOption defer)
347 {
348 m_defer = defer;
349 }
350
defer() const351 bool defer() const
352 {
353 return m_defer;
354 }
355
356 const StringImpl* m_tagImpl;
357 String m_urlToLoad;
358 ImageCandidate m_srcsetImageCandidate;
359 String m_charset;
360 bool m_linkIsStyleSheet;
361 bool m_matchedMediaAttribute;
362 bool m_inputIsImage;
363 String m_imgSrcUrl;
364 String m_srcsetAttributeValue;
365 unsigned m_sourceSize;
366 bool m_sourceSizeSet;
367 bool m_isCORSEnabled;
368 FetchRequest::DeferOption m_defer;
369 StoredCredentials m_allowCredentials;
370 RefPtr<MediaValues> m_mediaValues;
371 };
372
TokenPreloadScanner(const KURL & documentURL,PassRefPtr<MediaValues> mediaValues)373 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
374 : m_documentURL(documentURL)
375 , m_inStyle(false)
376 , m_inPicture(false)
377 , m_templateCount(0)
378 , m_mediaValues(mediaValues)
379 {
380 }
381
~TokenPreloadScanner()382 TokenPreloadScanner::~TokenPreloadScanner()
383 {
384 }
385
createCheckpoint()386 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
387 {
388 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
389 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
390 return checkpoint;
391 }
392
rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)393 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
394 {
395 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
396 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
397 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
398 m_inStyle = checkpoint.inStyle;
399 m_templateCount = checkpoint.templateCount;
400 m_cssScanner.reset();
401 m_checkpoints.clear();
402 }
403
scan(const HTMLToken & token,const SegmentedString & source,PreloadRequestStream & requests)404 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
405 {
406 scanCommon(token, source, requests);
407 }
408
scan(const CompactHTMLToken & token,const SegmentedString & source,PreloadRequestStream & requests)409 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
410 {
411 scanCommon(token, source, requests);
412 }
413
414 template<typename Token>
scanCommon(const Token & token,const SegmentedString & source,PreloadRequestStream & requests)415 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
416 {
417 switch (token.type()) {
418 case HTMLToken::Character: {
419 if (!m_inStyle)
420 return;
421 m_cssScanner.scan(token.data(), source, requests);
422 return;
423 }
424 case HTMLToken::EndTag: {
425 const StringImpl* tagImpl = tagImplFor(token.data());
426 if (match(tagImpl, templateTag)) {
427 if (m_templateCount)
428 --m_templateCount;
429 return;
430 }
431 if (match(tagImpl, styleTag)) {
432 if (m_inStyle)
433 m_cssScanner.reset();
434 m_inStyle = false;
435 return;
436 }
437 if (match(tagImpl, pictureTag))
438 m_inPicture = false;
439 return;
440 }
441 case HTMLToken::StartTag: {
442 if (m_templateCount)
443 return;
444 const StringImpl* tagImpl = tagImplFor(token.data());
445 if (match(tagImpl, templateTag)) {
446 ++m_templateCount;
447 return;
448 }
449 if (match(tagImpl, styleTag)) {
450 m_inStyle = true;
451 return;
452 }
453 if (match(tagImpl, baseTag)) {
454 // The first <base> element is the one that wins.
455 if (!m_predictedBaseElementURL.isEmpty())
456 return;
457 updatePredictedBaseURL(token);
458 return;
459 }
460 if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) {
461 m_inPicture = true;
462 m_pictureSourceURL = String();
463 return;
464 }
465
466 StartTagScanner scanner(tagImpl, m_mediaValues);
467 scanner.processAttributes(token.attributes());
468 if (m_inPicture)
469 scanner.handlePictureSourceURL(m_pictureSourceURL);
470 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
471 if (request)
472 requests.append(request.release());
473 return;
474 }
475 default: {
476 return;
477 }
478 }
479 }
480
481 template<typename Token>
updatePredictedBaseURL(const Token & token)482 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
483 {
484 ASSERT(m_predictedBaseElementURL.isEmpty());
485 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
486 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
487 }
488
HTMLPreloadScanner(const HTMLParserOptions & options,const KURL & documentURL,PassRefPtr<MediaValues> mediaValues)489 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
490 : m_scanner(documentURL, mediaValues)
491 , m_tokenizer(HTMLTokenizer::create(options))
492 {
493 }
494
~HTMLPreloadScanner()495 HTMLPreloadScanner::~HTMLPreloadScanner()
496 {
497 }
498
appendToEnd(const SegmentedString & source)499 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
500 {
501 m_source.append(source);
502 }
503
scan(HTMLResourcePreloader * preloader,const KURL & startingBaseElementURL)504 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
505 {
506 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
507
508 TRACE_EVENT1("blink", "HTMLPreloadScanner::scan", "source_length", m_source.length());
509
510 // When we start scanning, our best prediction of the baseElementURL is the real one!
511 if (!startingBaseElementURL.isEmpty())
512 m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
513
514 PreloadRequestStream requests;
515
516 while (m_tokenizer->nextToken(m_source, m_token)) {
517 if (m_token.type() == HTMLToken::StartTag)
518 m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
519 m_scanner.scan(m_token, m_source, requests);
520 m_token.clear();
521 }
522
523 preloader->takeAndPreload(requests);
524 }
525
526 }
527