• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/formcalc/cxfa_fmlexer.h"
8 
9 #include <algorithm>
10 
11 #include "core/fxcrt/compiler_specific.h"
12 #include "core/fxcrt/fx_extension.h"
13 #include "core/fxcrt/stl_util.h"
14 
15 namespace {
16 
IsFormCalcCharacter(wchar_t c)17 bool IsFormCalcCharacter(wchar_t c) {
18   return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
19          (c >= 0xE000 && c <= 0xFFFD);
20 }
21 
IsIdentifierCharacter(wchar_t c)22 bool IsIdentifierCharacter(wchar_t c) {
23   return FXSYS_iswalnum(c) || c == 0x005F ||  // '_'
24          c == 0x0024;                         // '$'
25 }
26 
IsInitialIdentifierCharacter(wchar_t c)27 bool IsInitialIdentifierCharacter(wchar_t c) {
28   return FXSYS_iswalpha(c) || c == 0x005F ||  // '_'
29          c == 0x0024 ||                       // '$'
30          c == 0x0021;                         // '!'
31 }
32 
IsWhitespaceCharacter(wchar_t c)33 bool IsWhitespaceCharacter(wchar_t c) {
34   return c == 0x0009 ||  // Horizontal tab
35          c == 0x000B ||  // Vertical tab
36          c == 0x000C ||  // Form feed
37          c == 0x0020;    // Space
38 }
39 
40 struct XFA_FMKeyword {
41   XFA_FM_TOKEN m_type;
42   const char* m_keyword;  // Raw, POD struct.
43 };
44 
45 const XFA_FMKeyword kKeyWords[] = {
46     {TOKdo, "do"},
47     {TOKkseq, "eq"},
48     {TOKksge, "ge"},
49     {TOKksgt, "gt"},
50     {TOKif, "if"},
51     {TOKin, "in"},
52     {TOKksle, "le"},
53     {TOKkslt, "lt"},
54     {TOKksne, "ne"},
55     {TOKksor, "or"},
56     {TOKnull, "null"},
57     {TOKbreak, "break"},
58     {TOKksand, "and"},
59     {TOKend, "end"},
60     {TOKeof, "eof"},
61     {TOKfor, "for"},
62     {TOKnan, "nan"},
63     {TOKksnot, "not"},
64     {TOKvar, "var"},
65     {TOKthen, "then"},
66     {TOKelse, "else"},
67     {TOKexit, "exit"},
68     {TOKdownto, "downto"},
69     {TOKreturn, "return"},
70     {TOKinfinity, "infinity"},
71     {TOKendwhile, "endwhile"},
72     {TOKforeach, "foreach"},
73     {TOKendfunc, "endfunc"},
74     {TOKelseif, "elseif"},
75     {TOKwhile, "while"},
76     {TOKendfor, "endfor"},
77     {TOKthrow, "throw"},
78     {TOKstep, "step"},
79     {TOKupto, "upto"},
80     {TOKcontinue, "continue"},
81     {TOKfunc, "func"},
82     {TOKendif, "endif"},
83 };
84 
85 #ifndef NDEBUG
86 constexpr auto kTokenStrings = fxcrt::ToArray<const char*>({
87     "TOKand",        "TOKlparen",     "TOKrparen",   "TOKmul",
88     "TOKplus",       "TOKcomma",      "TOKminus",    "TOKdot",
89     "TOKdiv",        "TOKlt",         "TOKassign",   "TOKgt",
90     "TOKlbracket",   "TOKrbracket",   "TOKor",       "TOKdotscream",
91     "TOKdotstar",    "TOKdotdot",     "TOKle",       "TOKne",
92     "TOKeq",         "TOKge",         "TOKdo",       "TOKkseq",
93     "TOKksge",       "TOKksgt",       "TOKif",       "TOKin",
94     "TOKksle",       "TOKkslt",       "TOKksne",     "TOKksor",
95     "TOKnull",       "TOKbreak",      "TOKksand",    "TOKend",
96     "TOKeof",        "TOKfor",        "TOKnan",      "TOKksnot",
97     "TOKvar",        "TOKthen",       "TOKelse",     "TOKexit",
98     "TOKdownto",     "TOKreturn",     "TOKinfinity", "TOKendwhile",
99     "TOKforeach",    "TOKendfunc",    "TOKelseif",   "TOKwhile",
100     "TOKendfor",     "TOKthrow",      "TOKstep",     "TOKupto",
101     "TOKcontinue",   "TOKfunc",       "TOKendif",    "TOKstar",
102     "TOKidentifier", "TOKunderscore", "TOKdollar",   "TOKexclamation",
103     "TOKcall",       "TOKstring",     "TOKnumber",   "TOKreserver",
104 });
105 #endif  // NDEBUG
106 
TokenizeIdentifier(WideStringView str)107 XFA_FM_TOKEN TokenizeIdentifier(WideStringView str) {
108   const XFA_FMKeyword* result =
109       std::find_if(std::begin(kKeyWords), std::end(kKeyWords),
110                    [str](const XFA_FMKeyword& iter) {
111                      return str.EqualsASCII(iter.m_keyword);
112                    });
113   if (result != std::end(kKeyWords) && str.EqualsASCII(result->m_keyword)) {
114     return result->m_type;
115   }
116   return TOKidentifier;
117 }
118 
119 }  // namespace
120 
121 CXFA_FMLexer::Token::Token() = default;
122 
Token(XFA_FM_TOKEN token)123 CXFA_FMLexer::Token::Token(XFA_FM_TOKEN token) : m_type(token) {}
124 
Token(XFA_FM_TOKEN token,WideStringView str)125 CXFA_FMLexer::Token::Token(XFA_FM_TOKEN token, WideStringView str)
126     : m_type(token), m_string(str) {}
127 
128 CXFA_FMLexer::Token::Token(const Token& that) = default;
129 
130 CXFA_FMLexer::Token::~Token() = default;
131 
132 #ifndef NDEBUG
ToDebugString() const133 WideString CXFA_FMLexer::Token::ToDebugString() const {
134   WideString str = WideString::FromASCII("type = ");
135   str += WideString::FromASCII(kTokenStrings[m_type]);
136   str += WideString::FromASCII(", string = ");
137   str += m_string;
138   return str;
139 }
140 #endif  // NDEBUG
141 
CXFA_FMLexer(WideStringView wsFormCalc)142 CXFA_FMLexer::CXFA_FMLexer(WideStringView wsFormCalc)
143     : m_spInput(wsFormCalc.span()) {}
144 
145 CXFA_FMLexer::~CXFA_FMLexer() = default;
146 
NextToken()147 CXFA_FMLexer::Token CXFA_FMLexer::NextToken() {
148   if (m_bLexerError)
149     return Token();
150 
151   while (!IsComplete() && m_spInput[m_nCursor]) {
152     if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
153       RaiseError();
154       return Token();
155     }
156 
157     switch (m_spInput[m_nCursor]) {
158       case '\n':
159         ++m_nCursor;
160         break;
161       case '\r':
162         ++m_nCursor;
163         break;
164       case ';':
165         AdvanceForComment();
166         break;
167       case '"':
168         return AdvanceForString();
169       case '0':
170       case '1':
171       case '2':
172       case '3':
173       case '4':
174       case '5':
175       case '6':
176       case '7':
177       case '8':
178       case '9':
179         return AdvanceForNumber();
180       case '=':
181         ++m_nCursor;
182         if (m_nCursor >= m_spInput.size())
183           return Token(TOKassign);
184 
185         if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
186           RaiseError();
187           return Token();
188         }
189         if (m_spInput[m_nCursor] == '=') {
190           ++m_nCursor;
191           return Token(TOKeq);
192         }
193         return Token(TOKassign);
194       case '<':
195         ++m_nCursor;
196         if (m_nCursor >= m_spInput.size())
197           return Token(TOKlt);
198 
199         if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
200           RaiseError();
201           return Token();
202         }
203         if (m_spInput[m_nCursor] == '=') {
204           ++m_nCursor;
205           return Token(TOKle);
206         }
207         if (m_spInput[m_nCursor] == '>') {
208           ++m_nCursor;
209           return Token(TOKne);
210         }
211         return Token(TOKlt);
212       case '>':
213         ++m_nCursor;
214         if (m_nCursor >= m_spInput.size())
215           return Token(TOKgt);
216 
217         if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
218           RaiseError();
219           return Token();
220         }
221         if (m_spInput[m_nCursor] == '=') {
222           ++m_nCursor;
223           return Token(TOKge);
224         }
225         return Token(TOKgt);
226       case ',':
227         ++m_nCursor;
228         return Token(TOKcomma);
229       case '(':
230         ++m_nCursor;
231         return Token(TOKlparen);
232       case ')':
233         ++m_nCursor;
234         return Token(TOKrparen);
235       case '[':
236         ++m_nCursor;
237         return Token(TOKlbracket);
238       case ']':
239         ++m_nCursor;
240         return Token(TOKrbracket);
241       case '&':
242         ++m_nCursor;
243         return Token(TOKand);
244       case '|':
245         ++m_nCursor;
246         return Token(TOKor);
247       case '+':
248         ++m_nCursor;
249         return Token(TOKplus);
250       case '-':
251         ++m_nCursor;
252         return Token(TOKminus);
253       case '*':
254         ++m_nCursor;
255         return Token(TOKmul);
256       case '/': {
257         ++m_nCursor;
258         if (m_nCursor >= m_spInput.size())
259           return Token(TOKdiv);
260 
261         if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
262           RaiseError();
263           return Token();
264         }
265         if (m_spInput[m_nCursor] != '/')
266           return Token(TOKdiv);
267 
268         AdvanceForComment();
269         break;
270       }
271       case '.':
272         ++m_nCursor;
273         if (m_nCursor >= m_spInput.size())
274           return Token(TOKdot);
275 
276         if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
277           RaiseError();
278           return Token();
279         }
280 
281         if (m_spInput[m_nCursor] == '.') {
282           ++m_nCursor;
283           return Token(TOKdotdot);
284         }
285         if (m_spInput[m_nCursor] == '*') {
286           ++m_nCursor;
287           return Token(TOKdotstar);
288         }
289         if (m_spInput[m_nCursor] == '#') {
290           ++m_nCursor;
291           return Token(TOKdotscream);
292         }
293         if (FXSYS_IsDecimalDigit(m_spInput[m_nCursor])) {
294           --m_nCursor;
295           return AdvanceForNumber();
296         }
297         return Token(TOKdot);
298       default:
299         if (IsWhitespaceCharacter(m_spInput[m_nCursor])) {
300           ++m_nCursor;
301           break;
302         }
303         if (!IsInitialIdentifierCharacter(m_spInput[m_nCursor])) {
304           RaiseError();
305           return Token();
306         }
307         return AdvanceForIdentifier();
308     }
309   }
310   return Token(TOKeof);
311 }
312 
AdvanceForNumber()313 CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForNumber() {
314   // This will set end to the character after the end of the number.
315   size_t used_length = 0;
316   if (m_nCursor < m_spInput.size()) {
317     FXSYS_wcstof(WideStringView(m_spInput.subspan(m_nCursor)), &used_length);
318   }
319   size_t end = m_nCursor + used_length;
320   if (used_length == 0 ||
321       (end < m_spInput.size() && FXSYS_iswalpha(m_spInput[end]))) {
322     RaiseError();
323     return Token();
324   }
325   WideStringView str(m_spInput.subspan(m_nCursor, end - m_nCursor));
326   m_nCursor = end;
327   return Token(TOKnumber, str);
328 }
329 
AdvanceForString()330 CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForString() {
331   size_t start = m_nCursor;
332   ++m_nCursor;
333   while (!IsComplete() && m_spInput[m_nCursor]) {
334     if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
335       break;
336 
337     if (m_spInput[m_nCursor] == '"') {
338       // Check for escaped "s, i.e. "".
339       ++m_nCursor;
340       // If the end of the input has been reached it was not escaped.
341       if (m_nCursor >= m_spInput.size()) {
342         return Token(TOKstring, WideStringView(m_spInput.subspan(
343                                     start, m_nCursor - start)));
344       }
345       // If the next character is not a " then the end of the string has been
346       // found.
347       if (m_spInput[m_nCursor] != '"') {
348         if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
349           break;
350 
351         return Token(TOKstring, WideStringView(m_spInput.subspan(
352                                     start, m_nCursor - start)));
353       }
354     }
355     ++m_nCursor;
356   }
357 
358   // Didn't find the end of the string.
359   RaiseError();
360   return Token();
361 }
362 
AdvanceForIdentifier()363 CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForIdentifier() {
364   size_t start = m_nCursor;
365   ++m_nCursor;
366   while (!IsComplete() && m_spInput[m_nCursor]) {
367     if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
368       RaiseError();
369       return Token();
370     }
371     if (!IsIdentifierCharacter(m_spInput[m_nCursor]))
372       break;
373 
374     ++m_nCursor;
375   }
376 
377   WideStringView str(m_spInput.subspan(start, m_nCursor - start));
378   return Token(TokenizeIdentifier(str), str);
379 }
380 
AdvanceForComment()381 void CXFA_FMLexer::AdvanceForComment() {
382   ++m_nCursor;
383   while (!IsComplete() && m_spInput[m_nCursor]) {
384     if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
385       RaiseError();
386       return;
387     }
388     if (m_spInput[m_nCursor] == L'\r') {
389       ++m_nCursor;
390       return;
391     }
392     if (m_spInput[m_nCursor] == L'\n') {
393       ++m_nCursor;
394       return;
395     }
396     ++m_nCursor;
397   }
398 }
399