• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_DATEPARSER_H_
29 #define V8_DATEPARSER_H_
30 
31 #include "allocation.h"
32 #include "char-predicates-inl.h"
33 
34 namespace v8 {
35 namespace internal {
36 
37 class DateParser : public AllStatic {
38  public:
39   // Parse the string as a date. If parsing succeeds, return true after
40   // filling out the output array as follows (all integers are Smis):
41   // [0]: year
42   // [1]: month (0 = Jan, 1 = Feb, ...)
43   // [2]: day
44   // [3]: hour
45   // [4]: minute
46   // [5]: second
47   // [6]: millisecond
48   // [7]: UTC offset in seconds, or null value if no timezone specified
49   // If parsing fails, return false (content of output array is not defined).
50   template <typename Char>
51   static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
52 
53   enum {
54     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
55   };
56 
57  private:
58   // Range testing
Between(int x,int lo,int hi)59   static inline bool Between(int x, int lo, int hi) {
60     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
61   }
62 
63   // Indicates a missing value.
64   static const int kNone = kMaxInt;
65 
66   // Maximal number of digits used to build the value of a numeral.
67   // Remaining digits are ignored.
68   static const int kMaxSignificantDigits = 9;
69 
70   // InputReader provides basic string parsing and character classification.
71   template <typename Char>
72   class InputReader BASE_EMBEDDED {
73    public:
InputReader(UnicodeCache * unicode_cache,Vector<Char> s)74     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
75         : index_(0),
76           buffer_(s),
77           unicode_cache_(unicode_cache) {
78       Next();
79     }
80 
position()81     int position() { return index_; }
82 
83     // Advance to the next character of the string.
Next()84     void Next() {
85       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
86       index_++;
87     }
88 
89     // Read a string of digits as an unsigned number. Cap value at
90     // kMaxSignificantDigits, but skip remaining digits if the numeral
91     // is longer.
ReadUnsignedNumeral()92     int ReadUnsignedNumeral() {
93       int n = 0;
94       int i = 0;
95       while (IsAsciiDigit()) {
96         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
97         i++;
98         Next();
99       }
100       return n;
101     }
102 
103     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
104     // lower-case prefix, and pad any remainder of the buffer with zeroes.
105     // Return word length.
ReadWord(uint32_t * prefix,int prefix_size)106     int ReadWord(uint32_t* prefix, int prefix_size) {
107       int len;
108       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
109         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
110       }
111       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
112       return len;
113     }
114 
115     // The skip methods return whether they actually skipped something.
Skip(uint32_t c)116     bool Skip(uint32_t c) {
117       if (ch_ == c) {
118         Next();
119         return true;
120       }
121       return false;
122     }
123 
SkipWhiteSpace()124     bool SkipWhiteSpace() {
125       if (unicode_cache_->IsWhiteSpace(ch_)) {
126         Next();
127         return true;
128       }
129       return false;
130     }
131 
SkipParentheses()132     bool SkipParentheses() {
133       if (ch_ != '(') return false;
134       int balance = 0;
135       do {
136         if (ch_ == ')') --balance;
137         else if (ch_ == '(') ++balance;
138         Next();
139       } while (balance > 0 && ch_);
140       return true;
141     }
142 
143     // Character testing/classification. Non-ASCII digits are not supported.
Is(uint32_t c)144     bool Is(uint32_t c) const { return ch_ == c; }
IsEnd()145     bool IsEnd() const { return ch_ == 0; }
IsAsciiDigit()146     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
IsAsciiAlphaOrAbove()147     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
IsAsciiSign()148     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
149 
150     // Return 1 for '+' and -1 for '-'.
GetAsciiSignValue()151     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
152 
153    private:
154     int index_;
155     Vector<Char> buffer_;
156     uint32_t ch_;
157     UnicodeCache* unicode_cache_;
158   };
159 
160   enum KeywordType {
161       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
162   };
163 
164   struct DateToken {
165    public:
IsInvalidDateToken166     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
IsUnknownDateToken167     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
IsNumberDateToken168     bool IsNumber() { return tag_ == kNumberTag; }
IsSymbolDateToken169     bool IsSymbol() { return tag_ == kSymbolTag; }
IsWhiteSpaceDateToken170     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
IsEndOfInputDateToken171     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
IsKeywordDateToken172     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
173 
lengthDateToken174     int length() { return length_; }
175 
numberDateToken176     int number() {
177       ASSERT(IsNumber());
178       return value_;
179     }
keyword_typeDateToken180     KeywordType keyword_type() {
181       ASSERT(IsKeyword());
182       return static_cast<KeywordType>(tag_);
183     }
keyword_valueDateToken184     int keyword_value() {
185       ASSERT(IsKeyword());
186       return value_;
187     }
symbolDateToken188     char symbol() {
189       ASSERT(IsSymbol());
190       return static_cast<char>(value_);
191     }
IsSymbolDateToken192     bool IsSymbol(char symbol) {
193       return IsSymbol() && this->symbol() == symbol;
194     }
IsKeywordTypeDateToken195     bool IsKeywordType(KeywordType tag) {
196       return tag_ == tag;
197     }
IsFixedLengthNumberDateToken198     bool IsFixedLengthNumber(int length) {
199       return IsNumber() && length_ == length;
200     }
IsAsciiSignDateToken201     bool IsAsciiSign() {
202       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
203     }
ascii_signDateToken204     int ascii_sign() {
205       ASSERT(IsAsciiSign());
206       return 44 - value_;
207     }
IsKeywordZDateToken208     bool IsKeywordZ() {
209       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
210     }
IsUnknownDateToken211     bool IsUnknown(int character) {
212       return IsUnknown() && value_ == character;
213     }
214     // Factory functions.
KeywordDateToken215     static DateToken Keyword(KeywordType tag, int value, int length) {
216       return DateToken(tag, length, value);
217     }
NumberDateToken218     static DateToken Number(int value, int length) {
219       return DateToken(kNumberTag, length, value);
220     }
SymbolDateToken221     static DateToken Symbol(char symbol) {
222       return DateToken(kSymbolTag, 1, symbol);
223     }
EndOfInputDateToken224     static DateToken EndOfInput() {
225       return DateToken(kEndOfInputTag, 0, -1);
226     }
WhiteSpaceDateToken227     static DateToken WhiteSpace(int length) {
228       return DateToken(kWhiteSpaceTag, length, -1);
229     }
UnknownDateToken230     static DateToken Unknown() {
231       return DateToken(kUnknownTokenTag, 1, -1);
232     }
InvalidDateToken233     static DateToken Invalid() {
234       return DateToken(kInvalidTokenTag, 0, -1);
235     }
236 
237    private:
238     enum TagType {
239       kInvalidTokenTag = -6,
240       kUnknownTokenTag = -5,
241       kWhiteSpaceTag = -4,
242       kNumberTag = -3,
243       kSymbolTag = -2,
244       kEndOfInputTag = -1,
245       kKeywordTagStart = 0
246     };
DateTokenDateToken247     DateToken(int tag, int length, int value)
248         : tag_(tag),
249           length_(length),
250           value_(value) { }
251 
252     int tag_;
253     int length_;  // Number of characters.
254     int value_;
255   };
256 
257   template <typename Char>
258   class DateStringTokenizer {
259    public:
DateStringTokenizer(InputReader<Char> * in)260     explicit DateStringTokenizer(InputReader<Char>* in)
261         : in_(in), next_(Scan()) { }
Next()262     DateToken Next() {
263       DateToken result = next_;
264       next_ = Scan();
265       return result;
266     }
267 
Peek()268     DateToken Peek() {
269       return next_;
270     }
SkipSymbol(char symbol)271     bool SkipSymbol(char symbol) {
272       if (next_.IsSymbol(symbol)) {
273         next_ = Scan();
274         return true;
275       }
276       return false;
277     }
278 
279    private:
280     DateToken Scan();
281 
282     InputReader<Char>* in_;
283     DateToken next_;
284   };
285 
286   static int ReadMilliseconds(DateToken number);
287 
288   // KeywordTable maps names of months, time zones, am/pm to numbers.
289   class KeywordTable : public AllStatic {
290    public:
291     // Look up a word in the keyword table and return an index.
292     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293     // and 'len' is the word length.
294     static int Lookup(const uint32_t* pre, int len);
295     // Get the type of the keyword at index i.
GetType(int i)296     static KeywordType GetType(int i) {
297       return static_cast<KeywordType>(array[i][kTypeOffset]);
298     }
299     // Get the value of the keyword at index i.
GetValue(int i)300     static int GetValue(int i) { return array[i][kValueOffset]; }
301 
302     static const int kPrefixLength = 3;
303     static const int kTypeOffset = kPrefixLength;
304     static const int kValueOffset = kTypeOffset + 1;
305     static const int kEntrySize = kValueOffset + 1;
306     static const int8_t array[][kEntrySize];
307   };
308 
309   class TimeZoneComposer BASE_EMBEDDED {
310    public:
TimeZoneComposer()311     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
Set(int offset_in_hours)312     void Set(int offset_in_hours) {
313       sign_ = offset_in_hours < 0 ? -1 : 1;
314       hour_ = offset_in_hours * sign_;
315       minute_ = 0;
316     }
SetSign(int sign)317     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
SetAbsoluteHour(int hour)318     void SetAbsoluteHour(int hour) { hour_ = hour; }
SetAbsoluteMinute(int minute)319     void SetAbsoluteMinute(int minute) { minute_ = minute; }
IsExpecting(int n)320     bool IsExpecting(int n) const {
321       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
322     }
IsUTC()323     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324     bool Write(FixedArray* output);
IsEmpty()325     bool IsEmpty() { return hour_ == kNone; }
326    private:
327     int sign_;
328     int hour_;
329     int minute_;
330   };
331 
332   class TimeComposer BASE_EMBEDDED {
333    public:
TimeComposer()334     TimeComposer() : index_(0), hour_offset_(kNone) {}
IsEmpty()335     bool IsEmpty() const { return index_ == 0; }
IsExpecting(int n)336     bool IsExpecting(int n) const {
337       return (index_ == 1 && IsMinute(n)) ||
338              (index_ == 2 && IsSecond(n)) ||
339              (index_ == 3 && IsMillisecond(n));
340     }
Add(int n)341     bool Add(int n) {
342       return index_ < kSize ? (comp_[index_++] = n, true) : false;
343     }
AddFinal(int n)344     bool AddFinal(int n) {
345       if (!Add(n)) return false;
346       while (index_ < kSize) comp_[index_++] = 0;
347       return true;
348     }
SetHourOffset(int n)349     void SetHourOffset(int n) { hour_offset_ = n; }
350     bool Write(FixedArray* output);
351 
IsMinute(int x)352     static bool IsMinute(int x) { return Between(x, 0, 59); }
IsHour(int x)353     static bool IsHour(int x) { return Between(x, 0, 23); }
IsSecond(int x)354     static bool IsSecond(int x) { return Between(x, 0, 59); }
355 
356    private:
IsHour12(int x)357     static bool IsHour12(int x) { return Between(x, 0, 12); }
IsMillisecond(int x)358     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
359 
360     static const int kSize = 4;
361     int comp_[kSize];
362     int index_;
363     int hour_offset_;
364   };
365 
366   class DayComposer BASE_EMBEDDED {
367    public:
DayComposer()368     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
IsEmpty()369     bool IsEmpty() const { return index_ == 0; }
Add(int n)370     bool Add(int n) {
371       if (index_ < kSize) {
372         comp_[index_] = n;
373         index_++;
374         return true;
375       }
376       return false;
377     }
SetNamedMonth(int n)378     void SetNamedMonth(int n) { named_month_ = n; }
379     bool Write(FixedArray* output);
set_iso_date()380     void set_iso_date() { is_iso_date_ = true; }
IsMonth(int x)381     static bool IsMonth(int x) { return Between(x, 1, 12); }
IsDay(int x)382     static bool IsDay(int x) { return Between(x, 1, 31); }
383 
384    private:
385     static const int kSize = 3;
386     int comp_[kSize];
387     int index_;
388     int named_month_;
389     // If set, ensures that data is always parsed in year-month-date order.
390     bool is_iso_date_;
391   };
392 
393   // Tries to parse an ES5 Date Time String. Returns the next token
394   // to continue with in the legacy date string parser. If parsing is
395   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
396   // returns DateToken::Invalid(). Otherwise parsing continues in the
397   // legacy parser.
398   template <typename Char>
399   static DateParser::DateToken ParseES5DateTime(
400       DateStringTokenizer<Char>* scanner,
401       DayComposer* day,
402       TimeComposer* time,
403       TimeZoneComposer* tz);
404 };
405 
406 
407 } }  // namespace v8::internal
408 
409 #endif  // V8_DATEPARSER_H_
410