1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "core/html/track/vtt/VTTParser.h"
33
34 #include "core/dom/Document.h"
35 #include "core/dom/ProcessingInstruction.h"
36 #include "core/dom/Text.h"
37 #include "core/html/track/vtt/VTTElement.h"
38 #include "core/html/track/vtt/VTTScanner.h"
39 #include "platform/RuntimeEnabledFeatures.h"
40 #include "platform/text/SegmentedString.h"
41 #include "wtf/text/WTFString.h"
42
43 namespace blink {
44
45 using namespace HTMLNames;
46
47 const double secondsPerHour = 3600;
48 const double secondsPerMinute = 60;
49 const double secondsPerMillisecond = 0.001;
50 const unsigned fileIdentifierLength = 6;
51
parseFloatPercentageValue(VTTScanner & valueScanner,float & percentage)52 bool VTTParser::parseFloatPercentageValue(VTTScanner& valueScanner, float& percentage)
53 {
54 float number;
55 if (!valueScanner.scanFloat(number))
56 return false;
57 // '%' must be present and at the end of the setting value.
58 if (!valueScanner.scan('%'))
59 return false;
60 if (number < 0 || number > 100)
61 return false;
62 percentage = number;
63 return true;
64 }
65
parseFloatPercentageValuePair(VTTScanner & valueScanner,char delimiter,FloatPoint & valuePair)66 bool VTTParser::parseFloatPercentageValuePair(VTTScanner& valueScanner, char delimiter, FloatPoint& valuePair)
67 {
68 float firstCoord;
69 if (!parseFloatPercentageValue(valueScanner, firstCoord))
70 return false;
71
72 if (!valueScanner.scan(delimiter))
73 return false;
74
75 float secondCoord;
76 if (!parseFloatPercentageValue(valueScanner, secondCoord))
77 return false;
78
79 valuePair = FloatPoint(firstCoord, secondCoord);
80 return true;
81 }
82
VTTParser(VTTParserClient * client,Document & document)83 VTTParser::VTTParser(VTTParserClient* client, Document& document)
84 : m_document(&document)
85 , m_state(Initial)
86 , m_decoder(TextResourceDecoder::create("text/plain", UTF8Encoding()))
87 , m_currentStartTime(0)
88 , m_currentEndTime(0)
89 , m_client(client)
90 {
91 }
92
getNewCues(WillBeHeapVector<RefPtrWillBeMember<VTTCue>> & outputCues)93 void VTTParser::getNewCues(WillBeHeapVector<RefPtrWillBeMember<VTTCue> >& outputCues)
94 {
95 outputCues = m_cueList;
96 m_cueList.clear();
97 }
98
getNewRegions(WillBeHeapVector<RefPtrWillBeMember<VTTRegion>> & outputRegions)99 void VTTParser::getNewRegions(WillBeHeapVector<RefPtrWillBeMember<VTTRegion> >& outputRegions)
100 {
101 outputRegions = m_regionList;
102 m_regionList.clear();
103 }
104
parseBytes(const char * data,unsigned length)105 void VTTParser::parseBytes(const char* data, unsigned length)
106 {
107 String textData = m_decoder->decode(data, length);
108 m_lineReader.append(textData);
109 parse();
110 }
111
flush()112 void VTTParser::flush()
113 {
114 String textData = m_decoder->flush();
115 m_lineReader.append(textData);
116 m_lineReader.setEndOfStream();
117 parse();
118 flushPendingCue();
119 }
120
parse()121 void VTTParser::parse()
122 {
123 // WebVTT parser algorithm. (5.1 WebVTT file parsing.)
124 // Steps 1 - 3 - Initial setup.
125
126 String line;
127 while (m_lineReader.getLine(line)) {
128 switch (m_state) {
129 case Initial:
130 // Steps 4 - 9 - Check for a valid WebVTT signature.
131 if (!hasRequiredFileIdentifier(line)) {
132 if (m_client)
133 m_client->fileFailedToParse();
134 return;
135 }
136
137 m_state = Header;
138 break;
139
140 case Header:
141 // Steps 10 - 14 - Allow a header (comment area) under the WEBVTT line.
142 collectMetadataHeader(line);
143
144 if (line.isEmpty()) {
145 if (m_client && m_regionList.size())
146 m_client->newRegionsParsed();
147
148 m_state = Id;
149 break;
150 }
151
152 // Step 15 - Break out of header loop if the line could be a timestamp line.
153 if (line.contains("-->"))
154 m_state = recoverCue(line);
155
156 // Step 16 - Line is not the empty string and does not contain "-->".
157 break;
158
159 case Id:
160 // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values.
161 if (line.isEmpty())
162 break;
163
164 // Step 21 - Cue creation (start a new cue).
165 resetCueValues();
166
167 // Steps 22 - 25 - Check if this line contains an optional identifier or timing data.
168 m_state = collectCueId(line);
169 break;
170
171 case TimingsAndSettings:
172 // Steps 26 - 27 - Discard current cue if the line is empty.
173 if (line.isEmpty()) {
174 m_state = Id;
175 break;
176 }
177
178 // Steps 28 - 29 - Collect cue timings and settings.
179 m_state = collectTimingsAndSettings(line);
180 break;
181
182 case CueText:
183 // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output.
184 m_state = collectCueText(line);
185 break;
186
187 case BadCue:
188 // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen.
189 m_state = ignoreBadCue(line);
190 break;
191 }
192 }
193 }
194
flushPendingCue()195 void VTTParser::flushPendingCue()
196 {
197 ASSERT(m_lineReader.isAtEndOfStream());
198 // If we're in the CueText state when we run out of data, we emit the pending cue.
199 if (m_state == CueText)
200 createNewCue();
201 }
202
hasRequiredFileIdentifier(const String & line)203 bool VTTParser::hasRequiredFileIdentifier(const String& line)
204 {
205 // A WebVTT file identifier consists of an optional BOM character,
206 // the string "WEBVTT" followed by an optional space or tab character,
207 // and any number of characters that are not line terminators ...
208 if (!line.startsWith("WEBVTT", fileIdentifierLength))
209 return false;
210 if (line.length() > fileIdentifierLength && !isASpace(line[fileIdentifierLength]))
211 return false;
212
213 return true;
214 }
215
collectMetadataHeader(const String & line)216 void VTTParser::collectMetadataHeader(const String& line)
217 {
218 // WebVTT header parsing (WebVTT parser algorithm step 12)
219 DEFINE_STATIC_LOCAL(const AtomicString, regionHeaderName, ("Region", AtomicString::ConstructFromLiteral));
220
221 // The only currently supported header is the "Region" header.
222 if (!RuntimeEnabledFeatures::webVTTRegionsEnabled())
223 return;
224
225 // Step 12.4 If line contains the character ":" (A U+003A COLON), then set metadata's
226 // name to the substring of line before the first ":" character and
227 // metadata's value to the substring after this character.
228 size_t colonPosition = line.find(':');
229 if (colonPosition == kNotFound)
230 return;
231
232 String headerName = line.substring(0, colonPosition);
233
234 // Steps 12.5 If metadata's name equals "Region":
235 if (headerName == regionHeaderName) {
236 String headerValue = line.substring(colonPosition + 1);
237 // Steps 12.5.1 - 12.5.11 Region creation: Let region be a new text track region [...]
238 createNewRegion(headerValue);
239 }
240 }
241
collectCueId(const String & line)242 VTTParser::ParseState VTTParser::collectCueId(const String& line)
243 {
244 if (line.contains("-->"))
245 return collectTimingsAndSettings(line);
246 m_currentId = AtomicString(line);
247 return TimingsAndSettings;
248 }
249
collectTimingsAndSettings(const String & line)250 VTTParser::ParseState VTTParser::collectTimingsAndSettings(const String& line)
251 {
252 VTTScanner input(line);
253
254 // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.)
255 // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input.
256 input.skipWhile<isASpace>();
257
258 // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time.
259 if (!collectTimeStamp(input, m_currentStartTime))
260 return BadCue;
261 input.skipWhile<isASpace>();
262
263 // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure.
264 if (!input.scan("-->"))
265 return BadCue;
266 input.skipWhile<isASpace>();
267
268 // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time.
269 if (!collectTimeStamp(input, m_currentEndTime))
270 return BadCue;
271 input.skipWhile<isASpace>();
272
273 // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue).
274 m_currentSettings = input.restOfInputAsString();
275 return CueText;
276 }
277
collectCueText(const String & line)278 VTTParser::ParseState VTTParser::collectCueText(const String& line)
279 {
280 // Step 34.
281 if (line.isEmpty()) {
282 createNewCue();
283 return Id;
284 }
285 // Step 35.
286 if (line.contains("-->")) {
287 // Step 39-40.
288 createNewCue();
289
290 // Step 41 - New iteration of the cue loop.
291 return recoverCue(line);
292 }
293 if (!m_currentContent.isEmpty())
294 m_currentContent.append('\n');
295 m_currentContent.append(line);
296
297 return CueText;
298 }
299
recoverCue(const String & line)300 VTTParser::ParseState VTTParser::recoverCue(const String& line)
301 {
302 // Step 17 and 21.
303 resetCueValues();
304
305 // Step 22.
306 return collectTimingsAndSettings(line);
307 }
308
ignoreBadCue(const String & line)309 VTTParser::ParseState VTTParser::ignoreBadCue(const String& line)
310 {
311 if (line.isEmpty())
312 return Id;
313 if (line.contains("-->"))
314 return recoverCue(line);
315 return BadCue;
316 }
317
318 // A helper class for the construction of a "cue fragment" from the cue text.
319 class VTTTreeBuilder {
320 STACK_ALLOCATED();
321 public:
VTTTreeBuilder(Document & document)322 explicit VTTTreeBuilder(Document& document)
323 : m_document(&document) { }
324
325 PassRefPtrWillBeRawPtr<DocumentFragment> buildFromString(const String& cueText);
326
327 private:
328 void constructTreeFromToken(Document&);
document() const329 Document& document() const { return *m_document; }
330
331 VTTToken m_token;
332 RefPtrWillBeMember<ContainerNode> m_currentNode;
333 Vector<AtomicString> m_languageStack;
334 RawPtrWillBeMember<Document> m_document;
335 };
336
buildFromString(const String & cueText)337 PassRefPtrWillBeRawPtr<DocumentFragment> VTTTreeBuilder::buildFromString(const String& cueText)
338 {
339 // Cue text processing based on
340 // 5.4 WebVTT cue text parsing rules, and
341 // 5.5 WebVTT cue text DOM construction rules
342
343 RefPtrWillBeRawPtr<DocumentFragment> fragment = DocumentFragment::create(document());
344
345 if (cueText.isEmpty()) {
346 fragment->parserAppendChild(Text::create(document(), ""));
347 return fragment;
348 }
349
350 m_currentNode = fragment;
351
352 VTTTokenizer tokenizer(cueText);
353 m_languageStack.clear();
354
355 while (tokenizer.nextToken(m_token))
356 constructTreeFromToken(document());
357
358 return fragment.release();
359 }
360
createDocumentFragmentFromCueText(Document & document,const String & cueText)361 PassRefPtrWillBeRawPtr<DocumentFragment> VTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText)
362 {
363 VTTTreeBuilder treeBuilder(document);
364 return treeBuilder.buildFromString(cueText);
365 }
366
createNewCue()367 void VTTParser::createNewCue()
368 {
369 RefPtrWillBeRawPtr<VTTCue> cue = VTTCue::create(*m_document, m_currentStartTime, m_currentEndTime, m_currentContent.toString());
370 cue->setId(m_currentId);
371 cue->parseSettings(m_currentSettings);
372
373 m_cueList.append(cue);
374 if (m_client)
375 m_client->newCuesParsed();
376 }
377
resetCueValues()378 void VTTParser::resetCueValues()
379 {
380 m_currentId = emptyAtom;
381 m_currentSettings = emptyString();
382 m_currentStartTime = 0;
383 m_currentEndTime = 0;
384 m_currentContent.clear();
385 }
386
createNewRegion(const String & headerValue)387 void VTTParser::createNewRegion(const String& headerValue)
388 {
389 if (headerValue.isEmpty())
390 return;
391
392 // Steps 12.5.1 - 12.5.9 - Construct and initialize a WebVTT Region object.
393 RefPtrWillBeRawPtr<VTTRegion> region = VTTRegion::create();
394 region->setRegionSettings(headerValue);
395
396 // Step 12.5.10 If the text track list of regions regions contains a region
397 // with the same region identifier value as region, remove that region.
398 for (size_t i = 0; i < m_regionList.size(); ++i) {
399 if (m_regionList[i]->id() == region->id()) {
400 m_regionList.remove(i);
401 break;
402 }
403 }
404
405 // Step 12.5.11
406 m_regionList.append(region);
407 }
408
collectTimeStamp(const String & line,double & timeStamp)409 bool VTTParser::collectTimeStamp(const String& line, double& timeStamp)
410 {
411 VTTScanner input(line);
412 return collectTimeStamp(input, timeStamp);
413 }
414
collectTimeStamp(VTTScanner & input,double & timeStamp)415 bool VTTParser::collectTimeStamp(VTTScanner& input, double& timeStamp)
416 {
417 // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.)
418 // Steps 1 - 4 - Initial checks, let most significant units be minutes.
419 enum Mode { Minutes, Hours };
420 Mode mode = Minutes;
421
422 // Steps 5 - 7 - Collect a sequence of characters that are 0-9.
423 // If not 2 characters or value is greater than 59, interpret as hours.
424 int value1;
425 unsigned value1Digits = input.scanDigits(value1);
426 if (!value1Digits)
427 return false;
428 if (value1Digits != 2 || value1 > 59)
429 mode = Hours;
430
431 // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars).
432 int value2;
433 if (!input.scan(':') || input.scanDigits(value2) != 2)
434 return false;
435
436 // Step 12 - Detect whether this timestamp includes hours.
437 int value3;
438 if (mode == Hours || input.match(':')) {
439 if (!input.scan(':') || input.scanDigits(value3) != 2)
440 return false;
441 } else {
442 value3 = value2;
443 value2 = value1;
444 value1 = 0;
445 }
446
447 // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars).
448 int value4;
449 if (!input.scan('.') || input.scanDigits(value4) != 3)
450 return false;
451 if (value2 > 59 || value3 > 59)
452 return false;
453
454 // Steps 18 - 19 - Calculate result.
455 timeStamp = (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond);
456 return true;
457 }
458
tokenToNodeType(VTTToken & token)459 static VTTNodeType tokenToNodeType(VTTToken& token)
460 {
461 switch (token.name().length()) {
462 case 1:
463 if (token.name()[0] == 'c')
464 return VTTNodeTypeClass;
465 if (token.name()[0] == 'v')
466 return VTTNodeTypeVoice;
467 if (token.name()[0] == 'b')
468 return VTTNodeTypeBold;
469 if (token.name()[0] == 'i')
470 return VTTNodeTypeItalic;
471 if (token.name()[0] == 'u')
472 return VTTNodeTypeUnderline;
473 break;
474 case 2:
475 if (token.name()[0] == 'r' && token.name()[1] == 't')
476 return VTTNodeTypeRubyText;
477 break;
478 case 4:
479 if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y')
480 return VTTNodeTypeRuby;
481 if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g')
482 return VTTNodeTypeLanguage;
483 break;
484 }
485 return VTTNodeTypeNone;
486 }
487
constructTreeFromToken(Document & document)488 void VTTTreeBuilder::constructTreeFromToken(Document& document)
489 {
490 // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules
491
492 switch (m_token.type()) {
493 case VTTTokenTypes::Character: {
494 m_currentNode->parserAppendChild(Text::create(document, m_token.characters()));
495 break;
496 }
497 case VTTTokenTypes::StartTag: {
498 VTTNodeType nodeType = tokenToNodeType(m_token);
499 if (nodeType == VTTNodeTypeNone)
500 break;
501
502 VTTNodeType currentType = m_currentNode->isVTTElement() ? toVTTElement(m_currentNode.get())->webVTTNodeType() : VTTNodeTypeNone;
503 // <rt> is only allowed if the current node is <ruby>.
504 if (nodeType == VTTNodeTypeRubyText && currentType != VTTNodeTypeRuby)
505 break;
506
507 RefPtrWillBeRawPtr<VTTElement> child = VTTElement::create(nodeType, &document);
508 if (!m_token.classes().isEmpty())
509 child->setAttribute(classAttr, m_token.classes());
510
511 if (nodeType == VTTNodeTypeVoice) {
512 child->setAttribute(VTTElement::voiceAttributeName(), m_token.annotation());
513 } else if (nodeType == VTTNodeTypeLanguage) {
514 m_languageStack.append(m_token.annotation());
515 child->setAttribute(VTTElement::langAttributeName(), m_languageStack.last());
516 }
517 if (!m_languageStack.isEmpty())
518 child->setLanguage(m_languageStack.last());
519 m_currentNode->parserAppendChild(child);
520 m_currentNode = child;
521 break;
522 }
523 case VTTTokenTypes::EndTag: {
524 VTTNodeType nodeType = tokenToNodeType(m_token);
525 if (nodeType == VTTNodeTypeNone)
526 break;
527
528 // The only non-VTTElement would be the DocumentFragment root. (Text
529 // nodes and PIs will never appear as m_currentNode.)
530 if (!m_currentNode->isVTTElement())
531 break;
532
533 VTTNodeType currentType = toVTTElement(m_currentNode.get())->webVTTNodeType();
534 bool matchesCurrent = nodeType == currentType;
535 if (!matchesCurrent) {
536 // </ruby> auto-closes <rt>.
537 if (currentType == VTTNodeTypeRubyText && nodeType == VTTNodeTypeRuby) {
538 if (m_currentNode->parentNode())
539 m_currentNode = m_currentNode->parentNode();
540 } else {
541 break;
542 }
543 }
544 if (nodeType == VTTNodeTypeLanguage)
545 m_languageStack.removeLast();
546 if (m_currentNode->parentNode())
547 m_currentNode = m_currentNode->parentNode();
548 break;
549 }
550 case VTTTokenTypes::TimestampTag: {
551 String charactersString = m_token.characters();
552 double parsedTimeStamp;
553 if (VTTParser::collectTimeStamp(charactersString, parsedTimeStamp))
554 m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp", charactersString));
555 break;
556 }
557 default:
558 break;
559 }
560 }
561
trace(Visitor * visitor)562 void VTTParser::trace(Visitor* visitor)
563 {
564 visitor->trace(m_document);
565 visitor->trace(m_cueList);
566 visitor->trace(m_regionList);
567 }
568
569 }
570