1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
8
9 #include <algorithm>
10
11 #include "core/fxcrt/fx_extension.h"
12
13 namespace {
14
IsFormCalcCharacter(wchar_t c)15 bool IsFormCalcCharacter(wchar_t c) {
16 return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
17 (c >= 0xE000 && c <= 0xFFFD);
18 }
19
IsIdentifierCharacter(wchar_t c)20 bool IsIdentifierCharacter(wchar_t c) {
21 return FXSYS_iswalnum(c) || c == 0x005F || // '_'
22 c == 0x0024; // '$'
23 }
24
IsInitialIdentifierCharacter(wchar_t c)25 bool IsInitialIdentifierCharacter(wchar_t c) {
26 return FXSYS_iswalpha(c) || c == 0x005F || // '_'
27 c == 0x0024 || // '$'
28 c == 0x0021; // '!'
29 }
30
IsWhitespaceCharacter(wchar_t c)31 bool IsWhitespaceCharacter(wchar_t c) {
32 return c == 0x0009 || // Horizontal tab
33 c == 0x000B || // Vertical tab
34 c == 0x000C || // Form feed
35 c == 0x0020; // Space
36 }
37
38 const XFA_FMKeyword keyWords[] = {
39 {TOKdo, "do"},
40 {TOKkseq, "eq"},
41 {TOKksge, "ge"},
42 {TOKksgt, "gt"},
43 {TOKif, "if"},
44 {TOKin, "in"},
45 {TOKksle, "le"},
46 {TOKkslt, "lt"},
47 {TOKksne, "ne"},
48 {TOKksor, "or"},
49 {TOKnull, "null"},
50 {TOKbreak, "break"},
51 {TOKksand, "and"},
52 {TOKend, "end"},
53 {TOKeof, "eof"},
54 {TOKfor, "for"},
55 {TOKnan, "nan"},
56 {TOKksnot, "not"},
57 {TOKvar, "var"},
58 {TOKthen, "then"},
59 {TOKelse, "else"},
60 {TOKexit, "exit"},
61 {TOKdownto, "downto"},
62 {TOKreturn, "return"},
63 {TOKinfinity, "infinity"},
64 {TOKendwhile, "endwhile"},
65 {TOKforeach, "foreach"},
66 {TOKendfunc, "endfunc"},
67 {TOKelseif, "elseif"},
68 {TOKwhile, "while"},
69 {TOKendfor, "endfor"},
70 {TOKthrow, "throw"},
71 {TOKstep, "step"},
72 {TOKupto, "upto"},
73 {TOKcontinue, "continue"},
74 {TOKfunc, "func"},
75 {TOKendif, "endif"},
76 };
77
78 #ifndef NDEBUG
79 const char* const tokenStrings[] = {
80 "TOKand", "TOKlparen", "TOKrparen", "TOKmul",
81 "TOKplus", "TOKcomma", "TOKminus", "TOKdot",
82 "TOKdiv", "TOKlt", "TOKassign", "TOKgt",
83 "TOKlbracket", "TOKrbracket", "TOKor", "TOKdotscream",
84 "TOKdotstar", "TOKdotdot", "TOKle", "TOKne",
85 "TOKeq", "TOKge", "TOKdo", "TOKkseq",
86 "TOKksge", "TOKksgt", "TOKif", "TOKin",
87 "TOKksle", "TOKkslt", "TOKksne", "TOKksor",
88 "TOKnull", "TOKbreak", "TOKksand", "TOKend",
89 "TOKeof", "TOKfor", "TOKnan", "TOKksnot",
90 "TOKvar", "TOKthen", "TOKelse", "TOKexit",
91 "TOKdownto", "TOKreturn", "TOKinfinity", "TOKendwhile",
92 "TOKforeach", "TOKendfunc", "TOKelseif", "TOKwhile",
93 "TOKendfor", "TOKthrow", "TOKstep", "TOKupto",
94 "TOKcontinue", "TOKfunc", "TOKendif", "TOKstar",
95 "TOKidentifier", "TOKunderscore", "TOKdollar", "TOKexclamation",
96 "TOKcall", "TOKstring", "TOKnumber", "TOKreserver",
97 };
98 #endif // NDEBUG
99
TokenizeIdentifier(WideStringView str)100 XFA_FM_TOKEN TokenizeIdentifier(WideStringView str) {
101 const XFA_FMKeyword* result =
102 std::find_if(std::begin(keyWords), std::end(keyWords),
103 [str](const XFA_FMKeyword& iter) {
104 return str.EqualsASCII(iter.m_keyword);
105 });
106 if (result != std::end(keyWords) && str.EqualsASCII(result->m_keyword))
107 return result->m_type;
108 return TOKidentifier;
109 }
110
111 } // namespace
112
CXFA_FMToken(XFA_FM_TOKEN token)113 CXFA_FMToken::CXFA_FMToken(XFA_FM_TOKEN token) : m_type(token) {}
114
CXFA_FMToken()115 CXFA_FMToken::CXFA_FMToken() : CXFA_FMToken(TOKreserver) {}
116
117 CXFA_FMToken::CXFA_FMToken(const CXFA_FMToken&) = default;
118
119 CXFA_FMToken::~CXFA_FMToken() = default;
120
121 #ifndef NDEBUG
ToDebugString() const122 WideString CXFA_FMToken::ToDebugString() const {
123 WideString str = WideString::FromASCII("type = ");
124 str += WideString::FromASCII(tokenStrings[m_type]);
125 str += WideString::FromASCII(", string = ");
126 str += m_string;
127 return str;
128 }
129 #endif // NDEBUG
130
CXFA_FMLexer(WideStringView wsFormCalc)131 CXFA_FMLexer::CXFA_FMLexer(WideStringView wsFormCalc)
132 : m_spInput(wsFormCalc.span()) {}
133
134 CXFA_FMLexer::~CXFA_FMLexer() = default;
135
NextToken()136 CXFA_FMToken CXFA_FMLexer::NextToken() {
137 if (m_bLexerError)
138 return CXFA_FMToken();
139
140 while (!IsComplete() && m_spInput[m_nCursor]) {
141 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
142 RaiseError();
143 return CXFA_FMToken();
144 }
145
146 switch (m_spInput[m_nCursor]) {
147 case '\n':
148 ++m_nCursor;
149 break;
150 case '\r':
151 ++m_nCursor;
152 break;
153 case ';':
154 AdvanceForComment();
155 break;
156 case '"':
157 return AdvanceForString();
158 case '0':
159 case '1':
160 case '2':
161 case '3':
162 case '4':
163 case '5':
164 case '6':
165 case '7':
166 case '8':
167 case '9':
168 return AdvanceForNumber();
169 case '=':
170 ++m_nCursor;
171 if (m_nCursor >= m_spInput.size())
172 return CXFA_FMToken(TOKassign);
173
174 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
175 RaiseError();
176 return CXFA_FMToken();
177 }
178 if (m_spInput[m_nCursor] == '=') {
179 ++m_nCursor;
180 return CXFA_FMToken(TOKeq);
181 }
182 return CXFA_FMToken(TOKassign);
183 case '<':
184 ++m_nCursor;
185 if (m_nCursor >= m_spInput.size())
186 return CXFA_FMToken(TOKlt);
187
188 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
189 RaiseError();
190 return CXFA_FMToken();
191 }
192 if (m_spInput[m_nCursor] == '=') {
193 ++m_nCursor;
194 return CXFA_FMToken(TOKle);
195 }
196 if (m_spInput[m_nCursor] == '>') {
197 ++m_nCursor;
198 return CXFA_FMToken(TOKne);
199 }
200 return CXFA_FMToken(TOKlt);
201 case '>':
202 ++m_nCursor;
203 if (m_nCursor >= m_spInput.size())
204 return CXFA_FMToken(TOKgt);
205
206 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
207 RaiseError();
208 return CXFA_FMToken();
209 }
210 if (m_spInput[m_nCursor] == '=') {
211 ++m_nCursor;
212 return CXFA_FMToken(TOKge);
213 }
214 return CXFA_FMToken(TOKgt);
215 case ',':
216 ++m_nCursor;
217 return CXFA_FMToken(TOKcomma);
218 case '(':
219 ++m_nCursor;
220 return CXFA_FMToken(TOKlparen);
221 case ')':
222 ++m_nCursor;
223 return CXFA_FMToken(TOKrparen);
224 case '[':
225 ++m_nCursor;
226 return CXFA_FMToken(TOKlbracket);
227 case ']':
228 ++m_nCursor;
229 return CXFA_FMToken(TOKrbracket);
230 case '&':
231 ++m_nCursor;
232 return CXFA_FMToken(TOKand);
233 case '|':
234 ++m_nCursor;
235 return CXFA_FMToken(TOKor);
236 case '+':
237 ++m_nCursor;
238 return CXFA_FMToken(TOKplus);
239 case '-':
240 ++m_nCursor;
241 return CXFA_FMToken(TOKminus);
242 case '*':
243 ++m_nCursor;
244 return CXFA_FMToken(TOKmul);
245 case '/': {
246 ++m_nCursor;
247 if (m_nCursor >= m_spInput.size())
248 return CXFA_FMToken(TOKdiv);
249
250 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
251 RaiseError();
252 return CXFA_FMToken();
253 }
254 if (m_spInput[m_nCursor] != '/')
255 return CXFA_FMToken(TOKdiv);
256
257 AdvanceForComment();
258 break;
259 }
260 case '.':
261 ++m_nCursor;
262 if (m_nCursor >= m_spInput.size())
263 return CXFA_FMToken(TOKdot);
264
265 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
266 RaiseError();
267 return CXFA_FMToken();
268 }
269
270 if (m_spInput[m_nCursor] == '.') {
271 ++m_nCursor;
272 return CXFA_FMToken(TOKdotdot);
273 }
274 if (m_spInput[m_nCursor] == '*') {
275 ++m_nCursor;
276 return CXFA_FMToken(TOKdotstar);
277 }
278 if (m_spInput[m_nCursor] == '#') {
279 ++m_nCursor;
280 return CXFA_FMToken(TOKdotscream);
281 }
282 if (FXSYS_IsDecimalDigit(m_spInput[m_nCursor])) {
283 --m_nCursor;
284 return AdvanceForNumber();
285 }
286 return CXFA_FMToken(TOKdot);
287 default:
288 if (IsWhitespaceCharacter(m_spInput[m_nCursor])) {
289 ++m_nCursor;
290 break;
291 }
292 if (!IsInitialIdentifierCharacter(m_spInput[m_nCursor])) {
293 RaiseError();
294 return CXFA_FMToken();
295 }
296 return AdvanceForIdentifier();
297 }
298 }
299 return CXFA_FMToken(TOKeof);
300 }
301
AdvanceForNumber()302 CXFA_FMToken CXFA_FMLexer::AdvanceForNumber() {
303 // This will set end to the character after the end of the number.
304 int32_t used_length = 0;
305 if (m_nCursor < m_spInput.size()) {
306 FXSYS_wcstof(&m_spInput[m_nCursor], m_spInput.size() - m_nCursor,
307 &used_length);
308 }
309 size_t end = m_nCursor + used_length;
310 if (used_length == 0 ||
311 (end < m_spInput.size() && FXSYS_iswalpha(m_spInput[end]))) {
312 RaiseError();
313 return CXFA_FMToken();
314 }
315 CXFA_FMToken token(TOKnumber);
316 token.m_string =
317 WideStringView(m_spInput.subspan(m_nCursor, end - m_nCursor));
318 m_nCursor = end;
319 return token;
320 }
321
AdvanceForString()322 CXFA_FMToken CXFA_FMLexer::AdvanceForString() {
323 CXFA_FMToken token(TOKstring);
324 size_t start = m_nCursor;
325 ++m_nCursor;
326 while (!IsComplete() && m_spInput[m_nCursor]) {
327 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
328 break;
329
330 if (m_spInput[m_nCursor] == '"') {
331 // Check for escaped "s, i.e. "".
332 ++m_nCursor;
333 // If the end of the input has been reached it was not escaped.
334 if (m_nCursor >= m_spInput.size()) {
335 token.m_string =
336 WideStringView(m_spInput.subspan(start, m_nCursor - start));
337 return token;
338 }
339 // If the next character is not a " then the end of the string has been
340 // found.
341 if (m_spInput[m_nCursor] != '"') {
342 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
343 break;
344
345 token.m_string =
346 WideStringView(m_spInput.subspan(start, m_nCursor - start));
347 return token;
348 }
349 }
350 ++m_nCursor;
351 }
352
353 // Didn't find the end of the string.
354 RaiseError();
355 return CXFA_FMToken();
356 }
357
AdvanceForIdentifier()358 CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() {
359 size_t start = m_nCursor;
360 ++m_nCursor;
361 while (!IsComplete() && m_spInput[m_nCursor]) {
362 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
363 RaiseError();
364 return CXFA_FMToken();
365 }
366 if (!IsIdentifierCharacter(m_spInput[m_nCursor]))
367 break;
368
369 ++m_nCursor;
370 }
371
372 WideStringView str =
373 WideStringView(m_spInput.subspan(start, m_nCursor - start));
374 CXFA_FMToken token(TokenizeIdentifier(str));
375 token.m_string = str;
376 return token;
377 }
378
AdvanceForComment()379 void CXFA_FMLexer::AdvanceForComment() {
380 ++m_nCursor;
381 while (!IsComplete() && m_spInput[m_nCursor]) {
382 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
383 RaiseError();
384 return;
385 }
386 if (m_spInput[m_nCursor] == L'\r') {
387 ++m_nCursor;
388 return;
389 }
390 if (m_spInput[m_nCursor] == L'\n') {
391 ++m_nCursor;
392 return;
393 }
394 ++m_nCursor;
395 }
396 }
397