1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_DATE_DATEPARSER_H_ 6 #define V8_DATE_DATEPARSER_H_ 7 8 #include "src/base/vector.h" 9 #include "src/strings/char-predicates.h" 10 #include "src/utils/allocation.h" 11 12 namespace v8 { 13 namespace internal { 14 15 class DateParser : public AllStatic { 16 public: 17 enum { 18 YEAR, 19 MONTH, 20 DAY, 21 HOUR, 22 MINUTE, 23 SECOND, 24 MILLISECOND, 25 UTC_OFFSET, 26 OUTPUT_SIZE 27 }; 28 29 // Parse the string as a date. If parsing succeeds, return true after 30 // filling out the output array as follows (all integers are Smis): 31 // [0]: year 32 // [1]: month (0 = Jan, 1 = Feb, ...) 33 // [2]: day 34 // [3]: hour 35 // [4]: minute 36 // [5]: second 37 // [6]: millisecond 38 // [7]: UTC offset in seconds, or null value if no timezone specified 39 // If parsing fails, return false (content of output array is not defined). 40 template <typename Char> 41 static bool Parse(Isolate* isolate, base::Vector<Char> str, double* output); 42 43 private: 44 // Range testing Between(int x,int lo,int hi)45 static inline bool Between(int x, int lo, int hi) { 46 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 47 } 48 49 // Indicates a missing value. 50 static const int kNone = kMaxInt; 51 52 // Maximal number of digits used to build the value of a numeral. 53 // Remaining digits are ignored. 54 static const int kMaxSignificantDigits = 9; 55 56 // InputReader provides basic string parsing and character classification. 57 template <typename Char> 58 class InputReader { 59 public: InputReader(base::Vector<Char> s)60 explicit InputReader(base::Vector<Char> s) : index_(0), buffer_(s) { 61 Next(); 62 } 63 position()64 int position() { return index_; } 65 66 // Advance to the next character of the string. Next()67 void Next() { 68 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 69 index_++; 70 } 71 72 // Read a string of digits as an unsigned number. Cap value at 73 // kMaxSignificantDigits, but skip remaining digits if the numeral 74 // is longer. ReadUnsignedNumeral()75 int ReadUnsignedNumeral() { 76 int n = 0; 77 int i = 0; 78 // First, skip leading zeros 79 while (ch_ == '0') Next(); 80 // And then, do the conversion 81 while (IsAsciiDigit()) { 82 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 83 i++; 84 Next(); 85 } 86 return n; 87 } 88 89 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 90 // lower-case prefix, and pad any remainder of the buffer with zeroes. 91 // Return word length. ReadWord(uint32_t * prefix,int prefix_size)92 int ReadWord(uint32_t* prefix, int prefix_size) { 93 int len; 94 for (len = 0; IsAsciiAlphaOrAbove() && !IsWhiteSpaceChar(); 95 Next(), len++) { 96 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 97 } 98 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 99 return len; 100 } 101 102 // The skip methods return whether they actually skipped something. Skip(uint32_t c)103 bool Skip(uint32_t c) { 104 if (ch_ == c) { 105 Next(); 106 return true; 107 } 108 return false; 109 } 110 111 inline bool SkipWhiteSpace(); 112 inline bool SkipParentheses(); 113 114 // Character testing/classification. Non-ASCII digits are not supported. Is(uint32_t c)115 bool Is(uint32_t c) const { return ch_ == c; } IsEnd()116 bool IsEnd() const { return ch_ == 0; } IsAsciiDigit()117 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } IsAsciiAlphaOrAbove()118 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } IsWhiteSpaceChar()119 bool IsWhiteSpaceChar() const { return IsWhiteSpace(ch_); } IsAsciiSign()120 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 121 122 // Return 1 for '+' and -1 for '-'. GetAsciiSignValue()123 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 124 125 private: 126 int index_; 127 base::Vector<Char> buffer_; 128 uint32_t ch_; 129 }; 130 131 enum KeywordType { 132 INVALID, 133 MONTH_NAME, 134 TIME_ZONE_NAME, 135 TIME_SEPARATOR, 136 AM_PM 137 }; 138 139 struct DateToken { 140 public: IsInvalidDateToken141 bool IsInvalid() { return tag_ == kInvalidTokenTag; } IsUnknownDateToken142 bool IsUnknown() { return tag_ == kUnknownTokenTag; } IsNumberDateToken143 bool IsNumber() { return tag_ == kNumberTag; } IsSymbolDateToken144 bool IsSymbol() { return tag_ == kSymbolTag; } IsWhiteSpaceDateToken145 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } IsEndOfInputDateToken146 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } IsKeywordDateToken147 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 148 lengthDateToken149 int length() { return length_; } 150 numberDateToken151 int number() { 152 DCHECK(IsNumber()); 153 return value_; 154 } keyword_typeDateToken155 KeywordType keyword_type() { 156 DCHECK(IsKeyword()); 157 return static_cast<KeywordType>(tag_); 158 } keyword_valueDateToken159 int keyword_value() { 160 DCHECK(IsKeyword()); 161 return value_; 162 } symbolDateToken163 char symbol() { 164 DCHECK(IsSymbol()); 165 return static_cast<char>(value_); 166 } IsSymbolDateToken167 bool IsSymbol(char symbol) { 168 return IsSymbol() && this->symbol() == symbol; 169 } IsKeywordTypeDateToken170 bool IsKeywordType(KeywordType tag) { return tag_ == tag; } IsFixedLengthNumberDateToken171 bool IsFixedLengthNumber(int length) { 172 return IsNumber() && length_ == length; 173 } IsAsciiSignDateToken174 bool IsAsciiSign() { 175 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 176 } ascii_signDateToken177 int ascii_sign() { 178 DCHECK(IsAsciiSign()); 179 return 44 - value_; 180 } IsKeywordZDateToken181 bool IsKeywordZ() { 182 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 183 } IsUnknownDateToken184 bool IsUnknown(int character) { return IsUnknown() && value_ == character; } 185 // Factory functions. KeywordDateToken186 static DateToken Keyword(KeywordType tag, int value, int length) { 187 return DateToken(tag, length, value); 188 } NumberDateToken189 static DateToken Number(int value, int length) { 190 return DateToken(kNumberTag, length, value); 191 } SymbolDateToken192 static DateToken Symbol(char symbol) { 193 return DateToken(kSymbolTag, 1, symbol); 194 } EndOfInputDateToken195 static DateToken EndOfInput() { return DateToken(kEndOfInputTag, 0, -1); } WhiteSpaceDateToken196 static DateToken WhiteSpace(int length) { 197 return DateToken(kWhiteSpaceTag, length, -1); 198 } UnknownDateToken199 static DateToken Unknown() { return DateToken(kUnknownTokenTag, 1, -1); } InvalidDateToken200 static DateToken Invalid() { return DateToken(kInvalidTokenTag, 0, -1); } 201 202 private: 203 enum TagType { 204 kInvalidTokenTag = -6, 205 kUnknownTokenTag = -5, 206 kWhiteSpaceTag = -4, 207 kNumberTag = -3, 208 kSymbolTag = -2, 209 kEndOfInputTag = -1, 210 kKeywordTagStart = 0 211 }; DateTokenDateToken212 DateToken(int tag, int length, int value) 213 : tag_(tag), length_(length), value_(value) {} 214 215 int tag_; 216 int length_; // Number of characters. 217 int value_; 218 }; 219 220 template <typename Char> 221 class DateStringTokenizer { 222 public: DateStringTokenizer(InputReader<Char> * in)223 explicit DateStringTokenizer(InputReader<Char>* in) 224 : in_(in), next_(Scan()) {} Next()225 DateToken Next() { 226 DateToken result = next_; 227 next_ = Scan(); 228 return result; 229 } 230 Peek()231 DateToken Peek() { return next_; } SkipSymbol(char symbol)232 bool SkipSymbol(char symbol) { 233 if (next_.IsSymbol(symbol)) { 234 next_ = Scan(); 235 return true; 236 } 237 return false; 238 } 239 240 private: 241 DateToken Scan(); 242 243 InputReader<Char>* in_; 244 DateToken next_; 245 }; 246 247 static int ReadMilliseconds(DateToken number); 248 249 // KeywordTable maps names of months, time zones, am/pm to numbers. 250 class KeywordTable : public AllStatic { 251 public: 252 // Look up a word in the keyword table and return an index. 253 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 254 // and 'len' is the word length. 255 static int Lookup(const uint32_t* pre, int len); 256 // Get the type of the keyword at index i. GetType(int i)257 static KeywordType GetType(int i) { 258 return static_cast<KeywordType>(array[i][kTypeOffset]); 259 } 260 // Get the value of the keyword at index i. GetValue(int i)261 static int GetValue(int i) { return array[i][kValueOffset]; } 262 263 static const int kPrefixLength = 3; 264 static const int kTypeOffset = kPrefixLength; 265 static const int kValueOffset = kTypeOffset + 1; 266 static const int kEntrySize = kValueOffset + 1; 267 static const int8_t array[][kEntrySize]; 268 }; 269 270 class TimeZoneComposer { 271 public: TimeZoneComposer()272 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} Set(int offset_in_hours)273 void Set(int offset_in_hours) { 274 sign_ = offset_in_hours < 0 ? -1 : 1; 275 hour_ = offset_in_hours * sign_; 276 minute_ = 0; 277 } SetSign(int sign)278 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } SetAbsoluteHour(int hour)279 void SetAbsoluteHour(int hour) { hour_ = hour; } SetAbsoluteMinute(int minute)280 void SetAbsoluteMinute(int minute) { minute_ = minute; } IsExpecting(int n)281 bool IsExpecting(int n) const { 282 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 283 } IsUTC()284 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 285 bool Write(double* output); IsEmpty()286 bool IsEmpty() { return hour_ == kNone; } 287 288 private: 289 int sign_; 290 int hour_; 291 int minute_; 292 }; 293 294 class TimeComposer { 295 public: TimeComposer()296 TimeComposer() : index_(0), hour_offset_(kNone) {} IsEmpty()297 bool IsEmpty() const { return index_ == 0; } IsExpecting(int n)298 bool IsExpecting(int n) const { 299 return (index_ == 1 && IsMinute(n)) || (index_ == 2 && IsSecond(n)) || 300 (index_ == 3 && IsMillisecond(n)); 301 } Add(int n)302 bool Add(int n) { 303 return index_ < kSize ? (comp_[index_++] = n, true) : false; 304 } AddFinal(int n)305 bool AddFinal(int n) { 306 if (!Add(n)) return false; 307 while (index_ < kSize) comp_[index_++] = 0; 308 return true; 309 } SetHourOffset(int n)310 void SetHourOffset(int n) { hour_offset_ = n; } 311 bool Write(double* output); 312 IsMinute(int x)313 static bool IsMinute(int x) { return Between(x, 0, 59); } IsHour(int x)314 static bool IsHour(int x) { return Between(x, 0, 23); } IsSecond(int x)315 static bool IsSecond(int x) { return Between(x, 0, 59); } 316 317 private: IsHour12(int x)318 static bool IsHour12(int x) { return Between(x, 0, 12); } IsMillisecond(int x)319 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 320 321 static const int kSize = 4; 322 int comp_[kSize]; 323 int index_; 324 int hour_offset_; 325 }; 326 327 class DayComposer { 328 public: DayComposer()329 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} IsEmpty()330 bool IsEmpty() const { return index_ == 0; } Add(int n)331 bool Add(int n) { 332 if (index_ < kSize) { 333 comp_[index_] = n; 334 index_++; 335 return true; 336 } 337 return false; 338 } SetNamedMonth(int n)339 void SetNamedMonth(int n) { named_month_ = n; } 340 bool Write(double* output); set_iso_date()341 void set_iso_date() { is_iso_date_ = true; } IsMonth(int x)342 static bool IsMonth(int x) { return Between(x, 1, 12); } IsDay(int x)343 static bool IsDay(int x) { return Between(x, 1, 31); } 344 345 private: 346 static const int kSize = 3; 347 int comp_[kSize]; 348 int index_; 349 int named_month_; 350 // If set, ensures that data is always parsed in year-month-date order. 351 bool is_iso_date_; 352 }; 353 354 // Tries to parse an ES5 Date Time String. Returns the next token 355 // to continue with in the legacy date string parser. If parsing is 356 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 357 // returns DateToken::Invalid(). Otherwise parsing continues in the 358 // legacy parser. 359 template <typename Char> 360 static DateParser::DateToken ParseES5DateTime( 361 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, 362 TimeZoneComposer* tz); 363 }; 364 365 } // namespace internal 366 } // namespace v8 367 368 #endif // V8_DATE_DATEPARSER_H_ 369