1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_DATEPARSER_H_ 6 #define V8_DATEPARSER_H_ 7 8 #include "src/allocation.h" 9 #include "src/char-predicates.h" 10 #include "src/parsing/scanner.h" 11 12 namespace v8 { 13 namespace internal { 14 15 class DateParser : public AllStatic { 16 public: 17 // Parse the string as a date. If parsing succeeds, return true after 18 // filling out the output array as follows (all integers are Smis): 19 // [0]: year 20 // [1]: month (0 = Jan, 1 = Feb, ...) 21 // [2]: day 22 // [3]: hour 23 // [4]: minute 24 // [5]: second 25 // [6]: millisecond 26 // [7]: UTC offset in seconds, or null value if no timezone specified 27 // If parsing fails, return false (content of output array is not defined). 28 template <typename Char> 29 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache); 30 31 enum { 32 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE 33 }; 34 35 private: 36 // Range testing Between(int x,int lo,int hi)37 static inline bool Between(int x, int lo, int hi) { 38 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 39 } 40 41 // Indicates a missing value. 42 static const int kNone = kMaxInt; 43 44 // Maximal number of digits used to build the value of a numeral. 45 // Remaining digits are ignored. 46 static const int kMaxSignificantDigits = 9; 47 48 // InputReader provides basic string parsing and character classification. 49 template <typename Char> 50 class InputReader BASE_EMBEDDED { 51 public: InputReader(UnicodeCache * unicode_cache,Vector<Char> s)52 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) 53 : index_(0), 54 buffer_(s), 55 unicode_cache_(unicode_cache) { 56 Next(); 57 } 58 position()59 int position() { return index_; } 60 61 // Advance to the next character of the string. Next()62 void Next() { 63 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 64 index_++; 65 } 66 67 // Read a string of digits as an unsigned number. Cap value at 68 // kMaxSignificantDigits, but skip remaining digits if the numeral 69 // is longer. ReadUnsignedNumeral()70 int ReadUnsignedNumeral() { 71 int n = 0; 72 int i = 0; 73 while (IsAsciiDigit()) { 74 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 75 i++; 76 Next(); 77 } 78 return n; 79 } 80 81 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 82 // lower-case prefix, and pad any remainder of the buffer with zeroes. 83 // Return word length. ReadWord(uint32_t * prefix,int prefix_size)84 int ReadWord(uint32_t* prefix, int prefix_size) { 85 int len; 86 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { 87 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 88 } 89 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 90 return len; 91 } 92 93 // The skip methods return whether they actually skipped something. Skip(uint32_t c)94 bool Skip(uint32_t c) { 95 if (ch_ == c) { 96 Next(); 97 return true; 98 } 99 return false; 100 } 101 102 inline bool SkipWhiteSpace(); 103 inline bool SkipParentheses(); 104 105 // Character testing/classification. Non-ASCII digits are not supported. Is(uint32_t c)106 bool Is(uint32_t c) const { return ch_ == c; } IsEnd()107 bool IsEnd() const { return ch_ == 0; } IsAsciiDigit()108 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } IsAsciiAlphaOrAbove()109 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } IsAsciiSign()110 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 111 112 // Return 1 for '+' and -1 for '-'. GetAsciiSignValue()113 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 114 115 private: 116 int index_; 117 Vector<Char> buffer_; 118 uint32_t ch_; 119 UnicodeCache* unicode_cache_; 120 }; 121 122 enum KeywordType { 123 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM 124 }; 125 126 struct DateToken { 127 public: IsInvalidDateToken128 bool IsInvalid() { return tag_ == kInvalidTokenTag; } IsUnknownDateToken129 bool IsUnknown() { return tag_ == kUnknownTokenTag; } IsNumberDateToken130 bool IsNumber() { return tag_ == kNumberTag; } IsSymbolDateToken131 bool IsSymbol() { return tag_ == kSymbolTag; } IsWhiteSpaceDateToken132 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } IsEndOfInputDateToken133 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } IsKeywordDateToken134 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 135 lengthDateToken136 int length() { return length_; } 137 numberDateToken138 int number() { 139 DCHECK(IsNumber()); 140 return value_; 141 } keyword_typeDateToken142 KeywordType keyword_type() { 143 DCHECK(IsKeyword()); 144 return static_cast<KeywordType>(tag_); 145 } keyword_valueDateToken146 int keyword_value() { 147 DCHECK(IsKeyword()); 148 return value_; 149 } symbolDateToken150 char symbol() { 151 DCHECK(IsSymbol()); 152 return static_cast<char>(value_); 153 } IsSymbolDateToken154 bool IsSymbol(char symbol) { 155 return IsSymbol() && this->symbol() == symbol; 156 } IsKeywordTypeDateToken157 bool IsKeywordType(KeywordType tag) { 158 return tag_ == tag; 159 } IsFixedLengthNumberDateToken160 bool IsFixedLengthNumber(int length) { 161 return IsNumber() && length_ == length; 162 } IsAsciiSignDateToken163 bool IsAsciiSign() { 164 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 165 } ascii_signDateToken166 int ascii_sign() { 167 DCHECK(IsAsciiSign()); 168 return 44 - value_; 169 } IsKeywordZDateToken170 bool IsKeywordZ() { 171 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 172 } IsUnknownDateToken173 bool IsUnknown(int character) { 174 return IsUnknown() && value_ == character; 175 } 176 // Factory functions. KeywordDateToken177 static DateToken Keyword(KeywordType tag, int value, int length) { 178 return DateToken(tag, length, value); 179 } NumberDateToken180 static DateToken Number(int value, int length) { 181 return DateToken(kNumberTag, length, value); 182 } SymbolDateToken183 static DateToken Symbol(char symbol) { 184 return DateToken(kSymbolTag, 1, symbol); 185 } EndOfInputDateToken186 static DateToken EndOfInput() { 187 return DateToken(kEndOfInputTag, 0, -1); 188 } WhiteSpaceDateToken189 static DateToken WhiteSpace(int length) { 190 return DateToken(kWhiteSpaceTag, length, -1); 191 } UnknownDateToken192 static DateToken Unknown() { 193 return DateToken(kUnknownTokenTag, 1, -1); 194 } InvalidDateToken195 static DateToken Invalid() { 196 return DateToken(kInvalidTokenTag, 0, -1); 197 } 198 199 private: 200 enum TagType { 201 kInvalidTokenTag = -6, 202 kUnknownTokenTag = -5, 203 kWhiteSpaceTag = -4, 204 kNumberTag = -3, 205 kSymbolTag = -2, 206 kEndOfInputTag = -1, 207 kKeywordTagStart = 0 208 }; DateTokenDateToken209 DateToken(int tag, int length, int value) 210 : tag_(tag), 211 length_(length), 212 value_(value) { } 213 214 int tag_; 215 int length_; // Number of characters. 216 int value_; 217 }; 218 219 template <typename Char> 220 class DateStringTokenizer { 221 public: DateStringTokenizer(InputReader<Char> * in)222 explicit DateStringTokenizer(InputReader<Char>* in) 223 : in_(in), next_(Scan()) { } Next()224 DateToken Next() { 225 DateToken result = next_; 226 next_ = Scan(); 227 return result; 228 } 229 Peek()230 DateToken Peek() { 231 return next_; 232 } SkipSymbol(char symbol)233 bool SkipSymbol(char symbol) { 234 if (next_.IsSymbol(symbol)) { 235 next_ = Scan(); 236 return true; 237 } 238 return false; 239 } 240 241 private: 242 DateToken Scan(); 243 244 InputReader<Char>* in_; 245 DateToken next_; 246 }; 247 248 static int ReadMilliseconds(DateToken number); 249 250 // KeywordTable maps names of months, time zones, am/pm to numbers. 251 class KeywordTable : public AllStatic { 252 public: 253 // Look up a word in the keyword table and return an index. 254 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 255 // and 'len' is the word length. 256 static int Lookup(const uint32_t* pre, int len); 257 // Get the type of the keyword at index i. GetType(int i)258 static KeywordType GetType(int i) { 259 return static_cast<KeywordType>(array[i][kTypeOffset]); 260 } 261 // Get the value of the keyword at index i. GetValue(int i)262 static int GetValue(int i) { return array[i][kValueOffset]; } 263 264 static const int kPrefixLength = 3; 265 static const int kTypeOffset = kPrefixLength; 266 static const int kValueOffset = kTypeOffset + 1; 267 static const int kEntrySize = kValueOffset + 1; 268 static const int8_t array[][kEntrySize]; 269 }; 270 271 class TimeZoneComposer BASE_EMBEDDED { 272 public: TimeZoneComposer()273 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} Set(int offset_in_hours)274 void Set(int offset_in_hours) { 275 sign_ = offset_in_hours < 0 ? -1 : 1; 276 hour_ = offset_in_hours * sign_; 277 minute_ = 0; 278 } SetSign(int sign)279 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } SetAbsoluteHour(int hour)280 void SetAbsoluteHour(int hour) { hour_ = hour; } SetAbsoluteMinute(int minute)281 void SetAbsoluteMinute(int minute) { minute_ = minute; } IsExpecting(int n)282 bool IsExpecting(int n) const { 283 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 284 } IsUTC()285 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 286 bool Write(FixedArray* output); IsEmpty()287 bool IsEmpty() { return hour_ == kNone; } 288 private: 289 int sign_; 290 int hour_; 291 int minute_; 292 }; 293 294 class TimeComposer BASE_EMBEDDED { 295 public: TimeComposer()296 TimeComposer() : index_(0), hour_offset_(kNone) {} IsEmpty()297 bool IsEmpty() const { return index_ == 0; } IsExpecting(int n)298 bool IsExpecting(int n) const { 299 return (index_ == 1 && IsMinute(n)) || 300 (index_ == 2 && IsSecond(n)) || 301 (index_ == 3 && IsMillisecond(n)); 302 } Add(int n)303 bool Add(int n) { 304 return index_ < kSize ? (comp_[index_++] = n, true) : false; 305 } AddFinal(int n)306 bool AddFinal(int n) { 307 if (!Add(n)) return false; 308 while (index_ < kSize) comp_[index_++] = 0; 309 return true; 310 } SetHourOffset(int n)311 void SetHourOffset(int n) { hour_offset_ = n; } 312 bool Write(FixedArray* output); 313 IsMinute(int x)314 static bool IsMinute(int x) { return Between(x, 0, 59); } IsHour(int x)315 static bool IsHour(int x) { return Between(x, 0, 23); } IsSecond(int x)316 static bool IsSecond(int x) { return Between(x, 0, 59); } 317 318 private: IsHour12(int x)319 static bool IsHour12(int x) { return Between(x, 0, 12); } IsMillisecond(int x)320 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 321 322 static const int kSize = 4; 323 int comp_[kSize]; 324 int index_; 325 int hour_offset_; 326 }; 327 328 class DayComposer BASE_EMBEDDED { 329 public: DayComposer()330 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} IsEmpty()331 bool IsEmpty() const { return index_ == 0; } Add(int n)332 bool Add(int n) { 333 if (index_ < kSize) { 334 comp_[index_] = n; 335 index_++; 336 return true; 337 } 338 return false; 339 } SetNamedMonth(int n)340 void SetNamedMonth(int n) { named_month_ = n; } 341 bool Write(FixedArray* output); set_iso_date()342 void set_iso_date() { is_iso_date_ = true; } IsMonth(int x)343 static bool IsMonth(int x) { return Between(x, 1, 12); } IsDay(int x)344 static bool IsDay(int x) { return Between(x, 1, 31); } 345 346 private: 347 static const int kSize = 3; 348 int comp_[kSize]; 349 int index_; 350 int named_month_; 351 // If set, ensures that data is always parsed in year-month-date order. 352 bool is_iso_date_; 353 }; 354 355 // Tries to parse an ES5 Date Time String. Returns the next token 356 // to continue with in the legacy date string parser. If parsing is 357 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 358 // returns DateToken::Invalid(). Otherwise parsing continues in the 359 // legacy parser. 360 template <typename Char> 361 static DateParser::DateToken ParseES5DateTime( 362 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, 363 TimeZoneComposer* tz); 364 }; 365 366 367 } // namespace internal 368 } // namespace v8 369 370 #endif // V8_DATEPARSER_H_ 371