• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_DATE_DATEPARSER_H_
6 #define V8_DATE_DATEPARSER_H_
7 
8 #include "src/base/vector.h"
9 #include "src/strings/char-predicates.h"
10 #include "src/utils/allocation.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 class DateParser : public AllStatic {
16  public:
17   enum {
18     YEAR,
19     MONTH,
20     DAY,
21     HOUR,
22     MINUTE,
23     SECOND,
24     MILLISECOND,
25     UTC_OFFSET,
26     OUTPUT_SIZE
27   };
28 
29   // Parse the string as a date. If parsing succeeds, return true after
30   // filling out the output array as follows (all integers are Smis):
31   // [0]: year
32   // [1]: month (0 = Jan, 1 = Feb, ...)
33   // [2]: day
34   // [3]: hour
35   // [4]: minute
36   // [5]: second
37   // [6]: millisecond
38   // [7]: UTC offset in seconds, or null value if no timezone specified
39   // If parsing fails, return false (content of output array is not defined).
40   template <typename Char>
41   static bool Parse(Isolate* isolate, base::Vector<Char> str, double* output);
42 
43  private:
44   // Range testing
Between(int x,int lo,int hi)45   static inline bool Between(int x, int lo, int hi) {
46     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
47   }
48 
49   // Indicates a missing value.
50   static const int kNone = kMaxInt;
51 
52   // Maximal number of digits used to build the value of a numeral.
53   // Remaining digits are ignored.
54   static const int kMaxSignificantDigits = 9;
55 
56   // InputReader provides basic string parsing and character classification.
57   template <typename Char>
58   class InputReader {
59    public:
InputReader(base::Vector<Char> s)60     explicit InputReader(base::Vector<Char> s) : index_(0), buffer_(s) {
61       Next();
62     }
63 
position()64     int position() { return index_; }
65 
66     // Advance to the next character of the string.
Next()67     void Next() {
68       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
69       index_++;
70     }
71 
72     // Read a string of digits as an unsigned number. Cap value at
73     // kMaxSignificantDigits, but skip remaining digits if the numeral
74     // is longer.
ReadUnsignedNumeral()75     int ReadUnsignedNumeral() {
76       int n = 0;
77       int i = 0;
78       // First, skip leading zeros
79       while (ch_ == '0') Next();
80       // And then, do the conversion
81       while (IsAsciiDigit()) {
82         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
83         i++;
84         Next();
85       }
86       return n;
87     }
88 
89     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
90     // lower-case prefix, and pad any remainder of the buffer with zeroes.
91     // Return word length.
ReadWord(uint32_t * prefix,int prefix_size)92     int ReadWord(uint32_t* prefix, int prefix_size) {
93       int len;
94       for (len = 0; IsAsciiAlphaOrAbove() && !IsWhiteSpaceChar();
95            Next(), len++) {
96         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
97       }
98       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
99       return len;
100     }
101 
102     // The skip methods return whether they actually skipped something.
Skip(uint32_t c)103     bool Skip(uint32_t c) {
104       if (ch_ == c) {
105         Next();
106         return true;
107       }
108       return false;
109     }
110 
111     inline bool SkipWhiteSpace();
112     inline bool SkipParentheses();
113 
114     // Character testing/classification. Non-ASCII digits are not supported.
Is(uint32_t c)115     bool Is(uint32_t c) const { return ch_ == c; }
IsEnd()116     bool IsEnd() const { return ch_ == 0; }
IsAsciiDigit()117     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
IsAsciiAlphaOrAbove()118     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
IsWhiteSpaceChar()119     bool IsWhiteSpaceChar() const { return IsWhiteSpace(ch_); }
IsAsciiSign()120     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
121 
122     // Return 1 for '+' and -1 for '-'.
GetAsciiSignValue()123     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
124 
125    private:
126     int index_;
127     base::Vector<Char> buffer_;
128     uint32_t ch_;
129   };
130 
131   enum KeywordType {
132     INVALID,
133     MONTH_NAME,
134     TIME_ZONE_NAME,
135     TIME_SEPARATOR,
136     AM_PM
137   };
138 
139   struct DateToken {
140    public:
IsInvalidDateToken141     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
IsUnknownDateToken142     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
IsNumberDateToken143     bool IsNumber() { return tag_ == kNumberTag; }
IsSymbolDateToken144     bool IsSymbol() { return tag_ == kSymbolTag; }
IsWhiteSpaceDateToken145     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
IsEndOfInputDateToken146     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
IsKeywordDateToken147     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
148 
lengthDateToken149     int length() { return length_; }
150 
numberDateToken151     int number() {
152       DCHECK(IsNumber());
153       return value_;
154     }
keyword_typeDateToken155     KeywordType keyword_type() {
156       DCHECK(IsKeyword());
157       return static_cast<KeywordType>(tag_);
158     }
keyword_valueDateToken159     int keyword_value() {
160       DCHECK(IsKeyword());
161       return value_;
162     }
symbolDateToken163     char symbol() {
164       DCHECK(IsSymbol());
165       return static_cast<char>(value_);
166     }
IsSymbolDateToken167     bool IsSymbol(char symbol) {
168       return IsSymbol() && this->symbol() == symbol;
169     }
IsKeywordTypeDateToken170     bool IsKeywordType(KeywordType tag) { return tag_ == tag; }
IsFixedLengthNumberDateToken171     bool IsFixedLengthNumber(int length) {
172       return IsNumber() && length_ == length;
173     }
IsAsciiSignDateToken174     bool IsAsciiSign() {
175       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
176     }
ascii_signDateToken177     int ascii_sign() {
178       DCHECK(IsAsciiSign());
179       return 44 - value_;
180     }
IsKeywordZDateToken181     bool IsKeywordZ() {
182       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
183     }
IsUnknownDateToken184     bool IsUnknown(int character) { return IsUnknown() && value_ == character; }
185     // Factory functions.
KeywordDateToken186     static DateToken Keyword(KeywordType tag, int value, int length) {
187       return DateToken(tag, length, value);
188     }
NumberDateToken189     static DateToken Number(int value, int length) {
190       return DateToken(kNumberTag, length, value);
191     }
SymbolDateToken192     static DateToken Symbol(char symbol) {
193       return DateToken(kSymbolTag, 1, symbol);
194     }
EndOfInputDateToken195     static DateToken EndOfInput() { return DateToken(kEndOfInputTag, 0, -1); }
WhiteSpaceDateToken196     static DateToken WhiteSpace(int length) {
197       return DateToken(kWhiteSpaceTag, length, -1);
198     }
UnknownDateToken199     static DateToken Unknown() { return DateToken(kUnknownTokenTag, 1, -1); }
InvalidDateToken200     static DateToken Invalid() { return DateToken(kInvalidTokenTag, 0, -1); }
201 
202    private:
203     enum TagType {
204       kInvalidTokenTag = -6,
205       kUnknownTokenTag = -5,
206       kWhiteSpaceTag = -4,
207       kNumberTag = -3,
208       kSymbolTag = -2,
209       kEndOfInputTag = -1,
210       kKeywordTagStart = 0
211     };
DateTokenDateToken212     DateToken(int tag, int length, int value)
213         : tag_(tag), length_(length), value_(value) {}
214 
215     int tag_;
216     int length_;  // Number of characters.
217     int value_;
218   };
219 
220   template <typename Char>
221   class DateStringTokenizer {
222    public:
DateStringTokenizer(InputReader<Char> * in)223     explicit DateStringTokenizer(InputReader<Char>* in)
224         : in_(in), next_(Scan()) {}
Next()225     DateToken Next() {
226       DateToken result = next_;
227       next_ = Scan();
228       return result;
229     }
230 
Peek()231     DateToken Peek() { return next_; }
SkipSymbol(char symbol)232     bool SkipSymbol(char symbol) {
233       if (next_.IsSymbol(symbol)) {
234         next_ = Scan();
235         return true;
236       }
237       return false;
238     }
239 
240    private:
241     DateToken Scan();
242 
243     InputReader<Char>* in_;
244     DateToken next_;
245   };
246 
247   static int ReadMilliseconds(DateToken number);
248 
249   // KeywordTable maps names of months, time zones, am/pm to numbers.
250   class KeywordTable : public AllStatic {
251    public:
252     // Look up a word in the keyword table and return an index.
253     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
254     // and 'len' is the word length.
255     static int Lookup(const uint32_t* pre, int len);
256     // Get the type of the keyword at index i.
GetType(int i)257     static KeywordType GetType(int i) {
258       return static_cast<KeywordType>(array[i][kTypeOffset]);
259     }
260     // Get the value of the keyword at index i.
GetValue(int i)261     static int GetValue(int i) { return array[i][kValueOffset]; }
262 
263     static const int kPrefixLength = 3;
264     static const int kTypeOffset = kPrefixLength;
265     static const int kValueOffset = kTypeOffset + 1;
266     static const int kEntrySize = kValueOffset + 1;
267     static const int8_t array[][kEntrySize];
268   };
269 
270   class TimeZoneComposer {
271    public:
TimeZoneComposer()272     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
Set(int offset_in_hours)273     void Set(int offset_in_hours) {
274       sign_ = offset_in_hours < 0 ? -1 : 1;
275       hour_ = offset_in_hours * sign_;
276       minute_ = 0;
277     }
SetSign(int sign)278     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
SetAbsoluteHour(int hour)279     void SetAbsoluteHour(int hour) { hour_ = hour; }
SetAbsoluteMinute(int minute)280     void SetAbsoluteMinute(int minute) { minute_ = minute; }
IsExpecting(int n)281     bool IsExpecting(int n) const {
282       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
283     }
IsUTC()284     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
285     bool Write(double* output);
IsEmpty()286     bool IsEmpty() { return hour_ == kNone; }
287 
288    private:
289     int sign_;
290     int hour_;
291     int minute_;
292   };
293 
294   class TimeComposer {
295    public:
TimeComposer()296     TimeComposer() : index_(0), hour_offset_(kNone) {}
IsEmpty()297     bool IsEmpty() const { return index_ == 0; }
IsExpecting(int n)298     bool IsExpecting(int n) const {
299       return (index_ == 1 && IsMinute(n)) || (index_ == 2 && IsSecond(n)) ||
300              (index_ == 3 && IsMillisecond(n));
301     }
Add(int n)302     bool Add(int n) {
303       return index_ < kSize ? (comp_[index_++] = n, true) : false;
304     }
AddFinal(int n)305     bool AddFinal(int n) {
306       if (!Add(n)) return false;
307       while (index_ < kSize) comp_[index_++] = 0;
308       return true;
309     }
SetHourOffset(int n)310     void SetHourOffset(int n) { hour_offset_ = n; }
311     bool Write(double* output);
312 
IsMinute(int x)313     static bool IsMinute(int x) { return Between(x, 0, 59); }
IsHour(int x)314     static bool IsHour(int x) { return Between(x, 0, 23); }
IsSecond(int x)315     static bool IsSecond(int x) { return Between(x, 0, 59); }
316 
317    private:
IsHour12(int x)318     static bool IsHour12(int x) { return Between(x, 0, 12); }
IsMillisecond(int x)319     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
320 
321     static const int kSize = 4;
322     int comp_[kSize];
323     int index_;
324     int hour_offset_;
325   };
326 
327   class DayComposer {
328    public:
DayComposer()329     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
IsEmpty()330     bool IsEmpty() const { return index_ == 0; }
Add(int n)331     bool Add(int n) {
332       if (index_ < kSize) {
333         comp_[index_] = n;
334         index_++;
335         return true;
336       }
337       return false;
338     }
SetNamedMonth(int n)339     void SetNamedMonth(int n) { named_month_ = n; }
340     bool Write(double* output);
set_iso_date()341     void set_iso_date() { is_iso_date_ = true; }
IsMonth(int x)342     static bool IsMonth(int x) { return Between(x, 1, 12); }
IsDay(int x)343     static bool IsDay(int x) { return Between(x, 1, 31); }
344 
345    private:
346     static const int kSize = 3;
347     int comp_[kSize];
348     int index_;
349     int named_month_;
350     // If set, ensures that data is always parsed in year-month-date order.
351     bool is_iso_date_;
352   };
353 
354   // Tries to parse an ES5 Date Time String. Returns the next token
355   // to continue with in the legacy date string parser. If parsing is
356   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
357   // returns DateToken::Invalid(). Otherwise parsing continues in the
358   // legacy parser.
359   template <typename Char>
360   static DateParser::DateToken ParseES5DateTime(
361       DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
362       TimeZoneComposer* tz);
363 };
364 
365 }  // namespace internal
366 }  // namespace v8
367 
368 #endif  // V8_DATE_DATEPARSER_H_
369