1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_DATEPARSER_H_ 6 #define V8_DATEPARSER_H_ 7 8 #include "src/allocation.h" 9 #include "src/char-predicates-inl.h" 10 11 namespace v8 { 12 namespace internal { 13 14 class DateParser : public AllStatic { 15 public: 16 // Parse the string as a date. If parsing succeeds, return true after 17 // filling out the output array as follows (all integers are Smis): 18 // [0]: year 19 // [1]: month (0 = Jan, 1 = Feb, ...) 20 // [2]: day 21 // [3]: hour 22 // [4]: minute 23 // [5]: second 24 // [6]: millisecond 25 // [7]: UTC offset in seconds, or null value if no timezone specified 26 // If parsing fails, return false (content of output array is not defined). 27 template <typename Char> 28 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache); 29 30 enum { 31 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE 32 }; 33 34 private: 35 // Range testing Between(int x,int lo,int hi)36 static inline bool Between(int x, int lo, int hi) { 37 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 38 } 39 40 // Indicates a missing value. 41 static const int kNone = kMaxInt; 42 43 // Maximal number of digits used to build the value of a numeral. 44 // Remaining digits are ignored. 45 static const int kMaxSignificantDigits = 9; 46 47 // InputReader provides basic string parsing and character classification. 48 template <typename Char> 49 class InputReader BASE_EMBEDDED { 50 public: InputReader(UnicodeCache * unicode_cache,Vector<Char> s)51 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) 52 : index_(0), 53 buffer_(s), 54 unicode_cache_(unicode_cache) { 55 Next(); 56 } 57 position()58 int position() { return index_; } 59 60 // Advance to the next character of the string. Next()61 void Next() { 62 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 63 index_++; 64 } 65 66 // Read a string of digits as an unsigned number. Cap value at 67 // kMaxSignificantDigits, but skip remaining digits if the numeral 68 // is longer. ReadUnsignedNumeral()69 int ReadUnsignedNumeral() { 70 int n = 0; 71 int i = 0; 72 while (IsAsciiDigit()) { 73 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 74 i++; 75 Next(); 76 } 77 return n; 78 } 79 80 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 81 // lower-case prefix, and pad any remainder of the buffer with zeroes. 82 // Return word length. ReadWord(uint32_t * prefix,int prefix_size)83 int ReadWord(uint32_t* prefix, int prefix_size) { 84 int len; 85 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { 86 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 87 } 88 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 89 return len; 90 } 91 92 // The skip methods return whether they actually skipped something. Skip(uint32_t c)93 bool Skip(uint32_t c) { 94 if (ch_ == c) { 95 Next(); 96 return true; 97 } 98 return false; 99 } 100 SkipWhiteSpace()101 bool SkipWhiteSpace() { 102 if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) { 103 Next(); 104 return true; 105 } 106 return false; 107 } 108 SkipParentheses()109 bool SkipParentheses() { 110 if (ch_ != '(') return false; 111 int balance = 0; 112 do { 113 if (ch_ == ')') --balance; 114 else if (ch_ == '(') ++balance; 115 Next(); 116 } while (balance > 0 && ch_); 117 return true; 118 } 119 120 // Character testing/classification. Non-ASCII digits are not supported. Is(uint32_t c)121 bool Is(uint32_t c) const { return ch_ == c; } IsEnd()122 bool IsEnd() const { return ch_ == 0; } IsAsciiDigit()123 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } IsAsciiAlphaOrAbove()124 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } IsAsciiSign()125 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 126 127 // Return 1 for '+' and -1 for '-'. GetAsciiSignValue()128 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 129 130 private: 131 int index_; 132 Vector<Char> buffer_; 133 uint32_t ch_; 134 UnicodeCache* unicode_cache_; 135 }; 136 137 enum KeywordType { 138 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM 139 }; 140 141 struct DateToken { 142 public: IsInvalidDateToken143 bool IsInvalid() { return tag_ == kInvalidTokenTag; } IsUnknownDateToken144 bool IsUnknown() { return tag_ == kUnknownTokenTag; } IsNumberDateToken145 bool IsNumber() { return tag_ == kNumberTag; } IsSymbolDateToken146 bool IsSymbol() { return tag_ == kSymbolTag; } IsWhiteSpaceDateToken147 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } IsEndOfInputDateToken148 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } IsKeywordDateToken149 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 150 lengthDateToken151 int length() { return length_; } 152 numberDateToken153 int number() { 154 ASSERT(IsNumber()); 155 return value_; 156 } keyword_typeDateToken157 KeywordType keyword_type() { 158 ASSERT(IsKeyword()); 159 return static_cast<KeywordType>(tag_); 160 } keyword_valueDateToken161 int keyword_value() { 162 ASSERT(IsKeyword()); 163 return value_; 164 } symbolDateToken165 char symbol() { 166 ASSERT(IsSymbol()); 167 return static_cast<char>(value_); 168 } IsSymbolDateToken169 bool IsSymbol(char symbol) { 170 return IsSymbol() && this->symbol() == symbol; 171 } IsKeywordTypeDateToken172 bool IsKeywordType(KeywordType tag) { 173 return tag_ == tag; 174 } IsFixedLengthNumberDateToken175 bool IsFixedLengthNumber(int length) { 176 return IsNumber() && length_ == length; 177 } IsAsciiSignDateToken178 bool IsAsciiSign() { 179 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 180 } ascii_signDateToken181 int ascii_sign() { 182 ASSERT(IsAsciiSign()); 183 return 44 - value_; 184 } IsKeywordZDateToken185 bool IsKeywordZ() { 186 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 187 } IsUnknownDateToken188 bool IsUnknown(int character) { 189 return IsUnknown() && value_ == character; 190 } 191 // Factory functions. KeywordDateToken192 static DateToken Keyword(KeywordType tag, int value, int length) { 193 return DateToken(tag, length, value); 194 } NumberDateToken195 static DateToken Number(int value, int length) { 196 return DateToken(kNumberTag, length, value); 197 } SymbolDateToken198 static DateToken Symbol(char symbol) { 199 return DateToken(kSymbolTag, 1, symbol); 200 } EndOfInputDateToken201 static DateToken EndOfInput() { 202 return DateToken(kEndOfInputTag, 0, -1); 203 } WhiteSpaceDateToken204 static DateToken WhiteSpace(int length) { 205 return DateToken(kWhiteSpaceTag, length, -1); 206 } UnknownDateToken207 static DateToken Unknown() { 208 return DateToken(kUnknownTokenTag, 1, -1); 209 } InvalidDateToken210 static DateToken Invalid() { 211 return DateToken(kInvalidTokenTag, 0, -1); 212 } 213 214 private: 215 enum TagType { 216 kInvalidTokenTag = -6, 217 kUnknownTokenTag = -5, 218 kWhiteSpaceTag = -4, 219 kNumberTag = -3, 220 kSymbolTag = -2, 221 kEndOfInputTag = -1, 222 kKeywordTagStart = 0 223 }; DateTokenDateToken224 DateToken(int tag, int length, int value) 225 : tag_(tag), 226 length_(length), 227 value_(value) { } 228 229 int tag_; 230 int length_; // Number of characters. 231 int value_; 232 }; 233 234 template <typename Char> 235 class DateStringTokenizer { 236 public: DateStringTokenizer(InputReader<Char> * in)237 explicit DateStringTokenizer(InputReader<Char>* in) 238 : in_(in), next_(Scan()) { } Next()239 DateToken Next() { 240 DateToken result = next_; 241 next_ = Scan(); 242 return result; 243 } 244 Peek()245 DateToken Peek() { 246 return next_; 247 } SkipSymbol(char symbol)248 bool SkipSymbol(char symbol) { 249 if (next_.IsSymbol(symbol)) { 250 next_ = Scan(); 251 return true; 252 } 253 return false; 254 } 255 256 private: 257 DateToken Scan(); 258 259 InputReader<Char>* in_; 260 DateToken next_; 261 }; 262 263 static int ReadMilliseconds(DateToken number); 264 265 // KeywordTable maps names of months, time zones, am/pm to numbers. 266 class KeywordTable : public AllStatic { 267 public: 268 // Look up a word in the keyword table and return an index. 269 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 270 // and 'len' is the word length. 271 static int Lookup(const uint32_t* pre, int len); 272 // Get the type of the keyword at index i. GetType(int i)273 static KeywordType GetType(int i) { 274 return static_cast<KeywordType>(array[i][kTypeOffset]); 275 } 276 // Get the value of the keyword at index i. GetValue(int i)277 static int GetValue(int i) { return array[i][kValueOffset]; } 278 279 static const int kPrefixLength = 3; 280 static const int kTypeOffset = kPrefixLength; 281 static const int kValueOffset = kTypeOffset + 1; 282 static const int kEntrySize = kValueOffset + 1; 283 static const int8_t array[][kEntrySize]; 284 }; 285 286 class TimeZoneComposer BASE_EMBEDDED { 287 public: TimeZoneComposer()288 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} Set(int offset_in_hours)289 void Set(int offset_in_hours) { 290 sign_ = offset_in_hours < 0 ? -1 : 1; 291 hour_ = offset_in_hours * sign_; 292 minute_ = 0; 293 } SetSign(int sign)294 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } SetAbsoluteHour(int hour)295 void SetAbsoluteHour(int hour) { hour_ = hour; } SetAbsoluteMinute(int minute)296 void SetAbsoluteMinute(int minute) { minute_ = minute; } IsExpecting(int n)297 bool IsExpecting(int n) const { 298 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 299 } IsUTC()300 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 301 bool Write(FixedArray* output); IsEmpty()302 bool IsEmpty() { return hour_ == kNone; } 303 private: 304 int sign_; 305 int hour_; 306 int minute_; 307 }; 308 309 class TimeComposer BASE_EMBEDDED { 310 public: TimeComposer()311 TimeComposer() : index_(0), hour_offset_(kNone) {} IsEmpty()312 bool IsEmpty() const { return index_ == 0; } IsExpecting(int n)313 bool IsExpecting(int n) const { 314 return (index_ == 1 && IsMinute(n)) || 315 (index_ == 2 && IsSecond(n)) || 316 (index_ == 3 && IsMillisecond(n)); 317 } Add(int n)318 bool Add(int n) { 319 return index_ < kSize ? (comp_[index_++] = n, true) : false; 320 } AddFinal(int n)321 bool AddFinal(int n) { 322 if (!Add(n)) return false; 323 while (index_ < kSize) comp_[index_++] = 0; 324 return true; 325 } SetHourOffset(int n)326 void SetHourOffset(int n) { hour_offset_ = n; } 327 bool Write(FixedArray* output); 328 IsMinute(int x)329 static bool IsMinute(int x) { return Between(x, 0, 59); } IsHour(int x)330 static bool IsHour(int x) { return Between(x, 0, 23); } IsSecond(int x)331 static bool IsSecond(int x) { return Between(x, 0, 59); } 332 333 private: IsHour12(int x)334 static bool IsHour12(int x) { return Between(x, 0, 12); } IsMillisecond(int x)335 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 336 337 static const int kSize = 4; 338 int comp_[kSize]; 339 int index_; 340 int hour_offset_; 341 }; 342 343 class DayComposer BASE_EMBEDDED { 344 public: DayComposer()345 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} IsEmpty()346 bool IsEmpty() const { return index_ == 0; } Add(int n)347 bool Add(int n) { 348 if (index_ < kSize) { 349 comp_[index_] = n; 350 index_++; 351 return true; 352 } 353 return false; 354 } SetNamedMonth(int n)355 void SetNamedMonth(int n) { named_month_ = n; } 356 bool Write(FixedArray* output); set_iso_date()357 void set_iso_date() { is_iso_date_ = true; } IsMonth(int x)358 static bool IsMonth(int x) { return Between(x, 1, 12); } IsDay(int x)359 static bool IsDay(int x) { return Between(x, 1, 31); } 360 361 private: 362 static const int kSize = 3; 363 int comp_[kSize]; 364 int index_; 365 int named_month_; 366 // If set, ensures that data is always parsed in year-month-date order. 367 bool is_iso_date_; 368 }; 369 370 // Tries to parse an ES5 Date Time String. Returns the next token 371 // to continue with in the legacy date string parser. If parsing is 372 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 373 // returns DateToken::Invalid(). Otherwise parsing continues in the 374 // legacy parser. 375 template <typename Char> 376 static DateParser::DateToken ParseES5DateTime( 377 DateStringTokenizer<Char>* scanner, 378 DayComposer* day, 379 TimeComposer* time, 380 TimeZoneComposer* tz); 381 }; 382 383 384 } } // namespace v8::internal 385 386 #endif // V8_DATEPARSER_H_ 387