1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/fm2js/xfa_lexer.h"
8
9 #include "core/fxcrt/fx_ext.h"
10
11 namespace {
12
13 struct XFA_FMDChar {
inc__anonf21882c60111::XFA_FMDChar14 static const FX_WCHAR* inc(const FX_WCHAR*& p) {
15 ++p;
16 return p;
17 }
dec__anonf21882c60111::XFA_FMDChar18 static const FX_WCHAR* dec(const FX_WCHAR*& p) {
19 --p;
20 return p;
21 }
get__anonf21882c60111::XFA_FMDChar22 static uint16_t get(const FX_WCHAR* p) { return *p; }
isWhiteSpace__anonf21882c60111::XFA_FMDChar23 static bool isWhiteSpace(const FX_WCHAR* p) {
24 return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
25 }
isLineTerminator__anonf21882c60111::XFA_FMDChar26 static bool isLineTerminator(const FX_WCHAR* p) {
27 return *p == 0x0A || *p == 0x0D;
28 }
isBinary__anonf21882c60111::XFA_FMDChar29 static bool isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; }
isOctal__anonf21882c60111::XFA_FMDChar30 static bool isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; }
isDigital__anonf21882c60111::XFA_FMDChar31 static bool isDigital(const FX_WCHAR* p) {
32 return (*p) >= '0' && (*p) <= '9';
33 }
isHex__anonf21882c60111::XFA_FMDChar34 static bool isHex(const FX_WCHAR* p) {
35 return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
36 ((*p) >= 'A' && (*p) <= 'F');
37 }
isAlpha__anonf21882c60111::XFA_FMDChar38 static bool isAlpha(const FX_WCHAR* p) {
39 return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
40 }
41 static bool isAvalid(const FX_WCHAR* p, bool flag = 0);
42 static bool string2number(const FX_WCHAR* s,
43 FX_DOUBLE* pValue,
44 const FX_WCHAR*& pEnd);
45 static bool isUnicodeAlpha(uint16_t ch);
46 };
47
isAvalid(const FX_WCHAR * p,bool flag)48 inline bool XFA_FMDChar::isAvalid(const FX_WCHAR* p, bool flag) {
49 if (*p == 0) {
50 return 1;
51 }
52 if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
53 (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
54 return 1;
55 }
56 if (!flag) {
57 if (*p == 0x0B || *p == 0x0C) {
58 return 1;
59 }
60 }
61 return 0;
62 }
63
string2number(const FX_WCHAR * s,FX_DOUBLE * pValue,const FX_WCHAR * & pEnd)64 inline bool XFA_FMDChar::string2number(const FX_WCHAR* s,
65 FX_DOUBLE* pValue,
66 const FX_WCHAR*& pEnd) {
67 if (s) {
68 *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
69 }
70 return 0;
71 }
72
isUnicodeAlpha(uint16_t ch)73 inline bool XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
74 if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
75 ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
76 ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
77 ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
78 ch == '+' || ch == '-' || ch == '*' || ch == '/') {
79 return false;
80 }
81 return true;
82 }
83
84 const XFA_FMKeyword keyWords[] = {
85 {TOKand, 0x00000026, L"&"},
86 {TOKlparen, 0x00000028, L"("},
87 {TOKrparen, 0x00000029, L")"},
88 {TOKmul, 0x0000002a, L"*"},
89 {TOKplus, 0x0000002b, L"+"},
90 {TOKcomma, 0x0000002c, L","},
91 {TOKminus, 0x0000002d, L"-"},
92 {TOKdot, 0x0000002e, L"."},
93 {TOKdiv, 0x0000002f, L"/"},
94 {TOKlt, 0x0000003c, L"<"},
95 {TOKassign, 0x0000003d, L"="},
96 {TOKgt, 0x0000003e, L">"},
97 {TOKlbracket, 0x0000005b, L"["},
98 {TOKrbracket, 0x0000005d, L"]"},
99 {TOKor, 0x0000007c, L"|"},
100 {TOKdotscream, 0x0000ec11, L".#"},
101 {TOKdotstar, 0x0000ec18, L".*"},
102 {TOKdotdot, 0x0000ec1c, L".."},
103 {TOKle, 0x000133f9, L"<="},
104 {TOKne, 0x000133fa, L"<>"},
105 {TOKeq, 0x0001391a, L"=="},
106 {TOKge, 0x00013e3b, L">="},
107 {TOKdo, 0x00020153, L"do"},
108 {TOKkseq, 0x00020676, L"eq"},
109 {TOKksge, 0x000210ac, L"ge"},
110 {TOKksgt, 0x000210bb, L"gt"},
111 {TOKif, 0x00021aef, L"if"},
112 {TOKin, 0x00021af7, L"in"},
113 {TOKksle, 0x00022a51, L"le"},
114 {TOKkslt, 0x00022a60, L"lt"},
115 {TOKksne, 0x00023493, L"ne"},
116 {TOKksor, 0x000239c1, L"or"},
117 {TOKnull, 0x052931bb, L"null"},
118 {TOKbreak, 0x05518c25, L"break"},
119 {TOKksand, 0x09f9db33, L"and"},
120 {TOKend, 0x0a631437, L"end"},
121 {TOKeof, 0x0a63195a, L"eof"},
122 {TOKfor, 0x0a7d67a7, L"for"},
123 {TOKnan, 0x0b4f91dd, L"nan"},
124 {TOKksnot, 0x0b4fd9b1, L"not"},
125 {TOKvar, 0x0c2203e9, L"var"},
126 {TOKthen, 0x2d5738cf, L"then"},
127 {TOKelse, 0x45f65ee9, L"else"},
128 {TOKexit, 0x4731d6ba, L"exit"},
129 {TOKdownto, 0x4caadc3b, L"downto"},
130 {TOKreturn, 0x4db8bd60, L"return"},
131 {TOKinfinity, 0x5c0a010a, L"infinity"},
132 {TOKendwhile, 0x5c64bff0, L"endwhile"},
133 {TOKforeach, 0x67e31f38, L"foreach"},
134 {TOKendfunc, 0x68f984a3, L"endfunc"},
135 {TOKelseif, 0x78253218, L"elseif"},
136 {TOKwhile, 0x84229259, L"while"},
137 {TOKendfor, 0x8ab49d7e, L"endfor"},
138 {TOKthrow, 0x8db05c94, L"throw"},
139 {TOKstep, 0xa7a7887c, L"step"},
140 {TOKupto, 0xb5155328, L"upto"},
141 {TOKcontinue, 0xc0340685, L"continue"},
142 {TOKfunc, 0xcdce60ec, L"func"},
143 {TOKendif, 0xe0e8fee6, L"endif"},
144 };
145
146 const XFA_FM_TOKEN KEYWORD_START = TOKdo;
147 const XFA_FM_TOKEN KEYWORD_END = TOKendif;
148
149 } // namespace
150
XFA_FM_KeywordToString(XFA_FM_TOKEN op)151 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
152 if (op < KEYWORD_START || op > KEYWORD_END)
153 return L"";
154 return keyWords[op].m_keyword;
155 }
156
CXFA_FMToken()157 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
158
CXFA_FMToken(uint32_t uLineNum)159 CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
160 : m_type(TOKreserver), m_uLinenum(uLineNum) {}
161
CXFA_FMLexer(const CFX_WideStringC & wsFormCalc,CXFA_FMErrorInfo * pErrorInfo)162 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
163 CXFA_FMErrorInfo* pErrorInfo)
164 : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {}
165
~CXFA_FMLexer()166 CXFA_FMLexer::~CXFA_FMLexer() {}
167
NextToken()168 CXFA_FMToken* CXFA_FMLexer::NextToken() {
169 m_pToken.reset(Scan());
170 return m_pToken.get();
171 }
172
Scan()173 CXFA_FMToken* CXFA_FMLexer::Scan() {
174 uint16_t ch = 0;
175 CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
176 if (!XFA_FMDChar::isAvalid(m_ptr)) {
177 ch = XFA_FMDChar::get(m_ptr);
178 Error(kFMErrUnsupportedChar, ch);
179 return p;
180 }
181 int iRet = 0;
182 while (1) {
183 if (!XFA_FMDChar::isAvalid(m_ptr)) {
184 ch = XFA_FMDChar::get(m_ptr);
185 Error(kFMErrUnsupportedChar, ch);
186 return p;
187 }
188 ch = XFA_FMDChar::get(m_ptr);
189 switch (ch) {
190 case 0:
191 p->m_type = TOKeof;
192 return p;
193 case 0x0A:
194 ++m_uCurrentLine;
195 p->m_uLinenum = m_uCurrentLine;
196 XFA_FMDChar::inc(m_ptr);
197 break;
198 case 0x0D:
199 XFA_FMDChar::inc(m_ptr);
200 break;
201 case ';': {
202 const FX_WCHAR* pTemp = 0;
203 Comment(m_ptr, pTemp);
204 m_ptr = pTemp;
205 } break;
206 case '"': {
207 const FX_WCHAR* pTemp = 0;
208 p->m_type = TOKstring;
209 iRet = String(p, m_ptr, pTemp);
210 m_ptr = pTemp;
211 }
212 return p;
213 case '0':
214 case '1':
215 case '2':
216 case '3':
217 case '4':
218 case '5':
219 case '6':
220 case '7':
221 case '8':
222 case '9': {
223 p->m_type = TOKnumber;
224 const FX_WCHAR* pTemp = 0;
225 iRet = Number(p, m_ptr, pTemp);
226 m_ptr = pTemp;
227 if (iRet) {
228 Error(kFMErrBadSuffixNumber);
229 return p;
230 }
231 }
232 return p;
233 case '=':
234 XFA_FMDChar::inc(m_ptr);
235 if (XFA_FMDChar::isAvalid(m_ptr)) {
236 ch = XFA_FMDChar::get(m_ptr);
237 if (ch == '=') {
238 p->m_type = TOKeq;
239 XFA_FMDChar::inc(m_ptr);
240 return p;
241 } else {
242 p->m_type = TOKassign;
243 return p;
244 }
245 } else {
246 ch = XFA_FMDChar::get(m_ptr);
247 Error(kFMErrUnsupportedChar, ch);
248 return p;
249 }
250 break;
251 case '<':
252 XFA_FMDChar::inc(m_ptr);
253 if (XFA_FMDChar::isAvalid(m_ptr)) {
254 ch = XFA_FMDChar::get(m_ptr);
255 if (ch == '=') {
256 p->m_type = TOKle;
257 XFA_FMDChar::inc(m_ptr);
258 return p;
259 } else if (ch == '>') {
260 p->m_type = TOKne;
261 XFA_FMDChar::inc(m_ptr);
262 return p;
263 } else {
264 p->m_type = TOKlt;
265 return p;
266 }
267 } else {
268 ch = XFA_FMDChar::get(m_ptr);
269 Error(kFMErrUnsupportedChar, ch);
270 return p;
271 }
272 break;
273 case '>':
274 XFA_FMDChar::inc(m_ptr);
275 if (XFA_FMDChar::isAvalid(m_ptr)) {
276 ch = XFA_FMDChar::get(m_ptr);
277 if (ch == '=') {
278 p->m_type = TOKge;
279 XFA_FMDChar::inc(m_ptr);
280 return p;
281 } else {
282 p->m_type = TOKgt;
283 return p;
284 }
285 } else {
286 ch = XFA_FMDChar::get(m_ptr);
287 Error(kFMErrUnsupportedChar, ch);
288 return p;
289 }
290 break;
291 case ',':
292 p->m_type = TOKcomma;
293 XFA_FMDChar::inc(m_ptr);
294 return p;
295 case '(':
296 p->m_type = TOKlparen;
297 XFA_FMDChar::inc(m_ptr);
298 return p;
299 case ')':
300 p->m_type = TOKrparen;
301 XFA_FMDChar::inc(m_ptr);
302 return p;
303 case '[':
304 p->m_type = TOKlbracket;
305 XFA_FMDChar::inc(m_ptr);
306 return p;
307 case ']':
308 p->m_type = TOKrbracket;
309 XFA_FMDChar::inc(m_ptr);
310 return p;
311 case '&':
312 XFA_FMDChar::inc(m_ptr);
313 p->m_type = TOKand;
314 return p;
315 case '|':
316 XFA_FMDChar::inc(m_ptr);
317 p->m_type = TOKor;
318 return p;
319 case '+':
320 XFA_FMDChar::inc(m_ptr);
321 p->m_type = TOKplus;
322 return p;
323 case '-':
324 XFA_FMDChar::inc(m_ptr);
325 p->m_type = TOKminus;
326 return p;
327 case '*':
328 XFA_FMDChar::inc(m_ptr);
329 p->m_type = TOKmul;
330 return p;
331 case '/':
332 XFA_FMDChar::inc(m_ptr);
333 if (XFA_FMDChar::isAvalid(m_ptr)) {
334 ch = XFA_FMDChar::get(m_ptr);
335 if (ch == '/') {
336 const FX_WCHAR* pTemp = 0;
337 Comment(m_ptr, pTemp);
338 m_ptr = pTemp;
339 break;
340 } else {
341 p->m_type = TOKdiv;
342 return p;
343 }
344 } else {
345 ch = XFA_FMDChar::get(m_ptr);
346 Error(kFMErrUnsupportedChar, ch);
347 return p;
348 }
349 break;
350 case '.':
351 XFA_FMDChar::inc(m_ptr);
352 if (XFA_FMDChar::isAvalid(m_ptr)) {
353 ch = XFA_FMDChar::get(m_ptr);
354 if (ch == '.') {
355 p->m_type = TOKdotdot;
356 XFA_FMDChar::inc(m_ptr);
357 return p;
358 } else if (ch == '*') {
359 p->m_type = TOKdotstar;
360 XFA_FMDChar::inc(m_ptr);
361 return p;
362 } else if (ch == '#') {
363 p->m_type = TOKdotscream;
364 XFA_FMDChar::inc(m_ptr);
365 return p;
366 } else if (ch <= '9' && ch >= '0') {
367 p->m_type = TOKnumber;
368 const FX_WCHAR* pTemp = 0;
369 XFA_FMDChar::dec(m_ptr);
370 iRet = Number(p, m_ptr, pTemp);
371 m_ptr = pTemp;
372 if (iRet) {
373 Error(kFMErrBadSuffixNumber);
374 }
375 return p;
376 } else {
377 p->m_type = TOKdot;
378 return p;
379 }
380 } else {
381 ch = XFA_FMDChar::get(m_ptr);
382 Error(kFMErrUnsupportedChar, ch);
383 return p;
384 }
385 case 0x09:
386 case 0x0B:
387 case 0x0C:
388 case 0x20:
389 XFA_FMDChar::inc(m_ptr);
390 break;
391 default: {
392 const FX_WCHAR* pTemp = 0;
393 iRet = Identifiers(p, m_ptr, pTemp);
394 m_ptr = pTemp;
395 if (iRet) {
396 return p;
397 }
398 p->m_type = IsKeyword(p->m_wstring);
399 }
400 return p;
401 }
402 }
403 }
404
Number(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)405 uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t,
406 const FX_WCHAR* p,
407 const FX_WCHAR*& pEnd) {
408 FX_DOUBLE number = 0;
409 if (XFA_FMDChar::string2number(p, &number, pEnd)) {
410 return 1;
411 }
412 if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
413 return 1;
414 }
415 t->m_wstring = CFX_WideStringC(p, (pEnd - p));
416 return 0;
417 }
418
String(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)419 uint32_t CXFA_FMLexer::String(CXFA_FMToken* t,
420 const FX_WCHAR* p,
421 const FX_WCHAR*& pEnd) {
422 const FX_WCHAR* pStart = p;
423 uint16_t ch = 0;
424 XFA_FMDChar::inc(p);
425 ch = XFA_FMDChar::get(p);
426 while (ch) {
427 if (!XFA_FMDChar::isAvalid(p)) {
428 ch = XFA_FMDChar::get(p);
429 pEnd = p;
430 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
431 Error(kFMErrUnsupportedChar, ch);
432 return 1;
433 }
434 if (ch == '"') {
435 XFA_FMDChar::inc(p);
436 if (!XFA_FMDChar::isAvalid(p)) {
437 ch = XFA_FMDChar::get(p);
438 pEnd = p;
439 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
440 Error(kFMErrUnsupportedChar, ch);
441 return 1;
442 }
443 ch = XFA_FMDChar::get(p);
444 if (ch == '"') {
445 goto NEXT;
446 } else {
447 break;
448 }
449 }
450 NEXT:
451 XFA_FMDChar::inc(p);
452 ch = XFA_FMDChar::get(p);
453 }
454 pEnd = p;
455 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
456 return 0;
457 }
458
Identifiers(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)459 uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
460 const FX_WCHAR* p,
461 const FX_WCHAR*& pEnd) {
462 const FX_WCHAR* pStart = p;
463 uint16_t ch = 0;
464 ch = XFA_FMDChar::get(p);
465 XFA_FMDChar::inc(p);
466 if (!XFA_FMDChar::isAvalid(p)) {
467 pEnd = p;
468 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
469 Error(kFMErrUnsupportedChar, ch);
470 return 1;
471 }
472 ch = XFA_FMDChar::get(p);
473 while (ch) {
474 if (!XFA_FMDChar::isAvalid(p)) {
475 pEnd = p;
476 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
477 Error(kFMErrUnsupportedChar, ch);
478 return 1;
479 }
480 ch = XFA_FMDChar::get(p);
481 if (XFA_FMDChar::isUnicodeAlpha(ch)) {
482 XFA_FMDChar::inc(p);
483 } else {
484 pEnd = p;
485 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
486 return 0;
487 }
488 }
489 pEnd = p;
490 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
491 return 0;
492 }
493
Comment(const FX_WCHAR * p,const FX_WCHAR * & pEnd)494 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
495 unsigned ch = 0;
496 XFA_FMDChar::inc(p);
497 ch = XFA_FMDChar::get(p);
498 while (ch) {
499 if (ch == 0x0D) {
500 XFA_FMDChar::inc(p);
501 pEnd = p;
502 return;
503 }
504 if (ch == 0x0A) {
505 ++m_uCurrentLine;
506 XFA_FMDChar::inc(p);
507 pEnd = p;
508 return;
509 }
510 XFA_FMDChar::inc(p);
511 ch = XFA_FMDChar::get(p);
512 }
513 pEnd = p;
514 }
515
IsKeyword(const CFX_WideStringC & str)516 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
517 uint32_t uHash = FX_HashCode_GetW(str, true);
518 int32_t iStart = KEYWORD_START;
519 int32_t iEnd = KEYWORD_END;
520 do {
521 int32_t iMid = (iStart + iEnd) / 2;
522 XFA_FMKeyword keyword = keyWords[iMid];
523 if (uHash == keyword.m_uHash)
524 return keyword.m_type;
525 if (uHash < keyword.m_uHash)
526 iEnd = iMid - 1;
527 else
528 iStart = iMid + 1;
529 } while (iStart <= iEnd);
530 return TOKidentifier;
531 }
532
Error(const FX_WCHAR * msg,...)533 void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) {
534 m_pErrorInfo->linenum = m_uCurrentLine;
535 va_list ap;
536 va_start(ap, msg);
537 m_pErrorInfo->message.FormatV(msg, ap);
538 va_end(ap);
539 }
540
HasError() const541 bool CXFA_FMLexer::HasError() const {
542 if (m_pErrorInfo->message.IsEmpty()) {
543 return false;
544 }
545 return true;
546 }
547