1#!/usr/bin/env python3 2 3# 4# Copyright (C) 2018 The Android Open Source Project 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17# 18 19"""This module contains the unit tests to check the Lexer class.""" 20 21import sys 22import unittest 23 24from blueprint import Lexer, LexerError, Token 25 26 27#------------------------------------------------------------------------------ 28# Python 2 compatibility 29#------------------------------------------------------------------------------ 30 31if sys.version_info >= (3,): 32 py3_str = str # pylint: disable=invalid-name 33else: 34 def py3_str(string): 35 """Convert a string into a utf-8 encoded string.""" 36 return unicode(string).encode('utf-8') 37 38 39#------------------------------------------------------------------------------ 40# LexerError 41#------------------------------------------------------------------------------ 42 43class LexerErrorTest(unittest.TestCase): 44 """Unit tests for LexerError class.""" 45 46 def test_lexer_error(self): 47 """Test LexerError __init__(), __str__(), line, column, and message.""" 48 49 exc = LexerError('a %', 2, 'unexpected character') 50 self.assertEqual(exc.line, 1) 51 self.assertEqual(exc.column, 3) 52 self.assertEqual(exc.message, 'unexpected character') 53 self.assertEqual(str(exc), 'LexerError: 1:3: unexpected character') 54 55 exc = LexerError('a\nb\ncde %', 8, 'unexpected character') 56 self.assertEqual(exc.line, 3) 57 self.assertEqual(exc.column, 5) 58 self.assertEqual(exc.message, 'unexpected character') 59 self.assertEqual(str(exc), 'LexerError: 3:5: unexpected character') 60 61 62 def test_hierarchy(self): 63 """Test the hierarchy of LexerError.""" 64 with self.assertRaises(ValueError): 65 raise LexerError('a', 0, 'error') 66 67 68class LexComputeLineColumn(unittest.TestCase): 69 """Unit tests for Lexer.compute_line_column() method.""" 70 71 def test_compute_line_column(self): 72 """Test the line and column computation.""" 73 74 # Line 1 75 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 0) 76 self.assertEqual(line, 1) 77 self.assertEqual(column, 1) 78 79 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 1) 80 self.assertEqual(line, 1) 81 self.assertEqual(column, 2) 82 83 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 2) 84 self.assertEqual(line, 1) 85 self.assertEqual(column, 3) 86 87 # Line 2 88 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 3) 89 self.assertEqual(line, 2) 90 self.assertEqual(column, 1) 91 92 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 4) 93 self.assertEqual(line, 2) 94 self.assertEqual(column, 2) 95 96 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 5) 97 self.assertEqual(line, 2) 98 self.assertEqual(column, 3) 99 100 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 6) 101 self.assertEqual(line, 2) 102 self.assertEqual(column, 4) 103 104 # Line 3 105 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 7) 106 self.assertEqual(line, 3) 107 self.assertEqual(column, 1) 108 109 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 8) 110 self.assertEqual(line, 3) 111 self.assertEqual(column, 2) 112 113 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 9) 114 self.assertEqual(line, 3) 115 self.assertEqual(column, 3) 116 117 # Line 4 (empty line) 118 line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 10) 119 self.assertEqual(line, 4) 120 self.assertEqual(column, 1) 121 122 123#------------------------------------------------------------------------------ 124# Lex.lex_string() 125#------------------------------------------------------------------------------ 126 127class LexStringTest(unittest.TestCase): 128 """Unit tests for the Lexer.lex_string() method.""" 129 130 def test_raw_string_lit(self): 131 """Test whether Lexer.lex_string() can tokenize raw string literal.""" 132 133 end, lit = Lexer.lex_string('`a`', 0) 134 self.assertEqual(end, 3) 135 self.assertEqual(lit, 'a') 136 137 end, lit = Lexer.lex_string('`a\nb`', 0) 138 self.assertEqual(end, 5) 139 self.assertEqual(lit, 'a\nb') 140 141 end, lit = Lexer.lex_string('"a""b"', 3) 142 self.assertEqual(end, 6) 143 self.assertEqual(lit, 'b') 144 145 with self.assertRaises(LexerError) as ctx: 146 Lexer.lex_string('`a', 0) 147 self.assertEqual(ctx.exception.line, 1) 148 self.assertEqual(ctx.exception.column, 3) 149 150 with self.assertRaises(LexerError) as ctx: 151 Lexer.lex_string('"a\nb"', 0) 152 self.assertEqual(ctx.exception.line, 1) 153 self.assertEqual(ctx.exception.column, 3) 154 155 156 def test_interpreted_string_literal(self): 157 """Test whether Lexer.lex_string() can tokenize interpreted string 158 literal.""" 159 160 end, lit = Lexer.lex_string('"a"', 0) 161 self.assertEqual(end, 3) 162 self.assertEqual(lit, 'a') 163 164 end, lit = Lexer.lex_string('"n"', 0) 165 self.assertEqual(end, 3) 166 self.assertEqual(lit, 'n') 167 168 with self.assertRaises(LexerError) as ctx: 169 Lexer.lex_string('"\\', 0) 170 self.assertEqual(ctx.exception.line, 1) 171 self.assertEqual(ctx.exception.column, 2) 172 173 174 def test_literal_escape_char(self): 175 """Test whether Lexer.lex_string() can tokenize interpreted string 176 literal with a escaped character.""" 177 178 end, lit = Lexer.lex_string('"\\a"', 0) 179 self.assertEqual(end, 4) 180 self.assertEqual(lit, '\a') 181 182 end, lit = Lexer.lex_string('"\\b"', 0) 183 self.assertEqual(end, 4) 184 self.assertEqual(lit, '\b') 185 186 end, lit = Lexer.lex_string('"\\f"', 0) 187 self.assertEqual(end, 4) 188 self.assertEqual(lit, '\f') 189 190 end, lit = Lexer.lex_string('"\\n"', 0) 191 self.assertEqual(end, 4) 192 self.assertEqual(lit, '\n') 193 194 end, lit = Lexer.lex_string('"\\r"', 0) 195 self.assertEqual(end, 4) 196 self.assertEqual(lit, '\r') 197 198 end, lit = Lexer.lex_string('"\\t"', 0) 199 self.assertEqual(end, 4) 200 self.assertEqual(lit, '\t') 201 202 end, lit = Lexer.lex_string('"\\v"', 0) 203 self.assertEqual(end, 4) 204 self.assertEqual(lit, '\v') 205 206 end, lit = Lexer.lex_string('"\\\\"', 0) 207 self.assertEqual(end, 4) 208 self.assertEqual(lit, '\\') 209 210 end, lit = Lexer.lex_string('"\\\'"', 0) 211 self.assertEqual(end, 4) 212 self.assertEqual(lit, '\'') 213 214 end, lit = Lexer.lex_string('"\\\""', 0) 215 self.assertEqual(end, 4) 216 self.assertEqual(lit, '\"') 217 218 with self.assertRaises(LexerError) as ctx: 219 Lexer.lex_string('"\\?"', 0) 220 self.assertEqual(ctx.exception.line, 1) 221 self.assertEqual(ctx.exception.column, 2) 222 223 224 def test_literal_escape_octal(self): 225 """Test whether Lexer.lex_string() can tokenize interpreted string 226 literal with an octal escape sequence.""" 227 228 end, lit = Lexer.lex_string('"\\000"', 0) 229 self.assertEqual(end, 6) 230 self.assertEqual(lit, '\0') 231 232 end, lit = Lexer.lex_string('"\\377"', 0) 233 self.assertEqual(end, 6) 234 self.assertEqual(lit, '\377') 235 236 tests = [ 237 '"\\0', 238 '"\\0" ', 239 '"\\09" ', 240 '"\\009"', 241 ] 242 243 for test in tests: 244 with self.assertRaises(LexerError) as ctx: 245 Lexer.lex_string(test, 0) 246 self.assertEqual(ctx.exception.line, 1) 247 self.assertEqual(ctx.exception.column, 2) 248 249 250 def test_literal_escape_hex(self): 251 """Test whether Lexer.lex_string() can tokenize interpreted string 252 literal with a hexadecimal escape sequence.""" 253 254 end, lit = Lexer.lex_string('"\\x00"', 0) 255 self.assertEqual(end, 6) 256 self.assertEqual(lit, '\0') 257 258 end, lit = Lexer.lex_string('"\\xff"', 0) 259 self.assertEqual(end, 6) 260 self.assertEqual(lit, '\xff') 261 262 tests = [ 263 '"\\x', 264 '"\\x" ', 265 '"\\x0" ', 266 '"\\xg" ', 267 '"\\x0g"', 268 ] 269 270 for test in tests: 271 with self.assertRaises(LexerError) as ctx: 272 Lexer.lex_string(test, 0) 273 self.assertEqual(ctx.exception.line, 1) 274 self.assertEqual(ctx.exception.column, 2) 275 276 277 def test_literal_escape_little_u(self): 278 """Test whether Lexer.lex_string() can tokenize interpreted string 279 literal with a little u escape sequence.""" 280 281 end, lit = Lexer.lex_string('"\\u0000"', 0) 282 self.assertEqual(end, 8) 283 self.assertEqual(lit, '\0') 284 285 end, lit = Lexer.lex_string('"\\uffff"', 0) 286 self.assertEqual(end, 8) 287 self.assertEqual(lit, py3_str(u'\uffff')) 288 289 tests = [ 290 '"\\u', 291 '"\\u" ', 292 '"\\u0" ', 293 '"\\ug" ', 294 '"\\u0g" ', 295 '"\\u00g" ', 296 '"\\u000g"', 297 ] 298 299 for test in tests: 300 with self.assertRaises(LexerError) as ctx: 301 Lexer.lex_string(test, 0) 302 self.assertEqual(ctx.exception.line, 1) 303 self.assertEqual(ctx.exception.column, 2) 304 305 306 def test_literal_escape_big_u(self): 307 """Test whether Lexer.lex_string() can tokenize interpreted string 308 literal with a big u escape sequence.""" 309 310 end, lit = Lexer.lex_string('"\\U00000000"', 0) 311 self.assertEqual(end, 12) 312 self.assertEqual(lit, '\0') 313 314 end, lit = Lexer.lex_string('"\\U0001ffff"', 0) 315 self.assertEqual(end, 12) 316 self.assertEqual(lit, py3_str(u'\U0001ffff')) 317 318 tests = [ 319 '"\\U', 320 '"\\U" ', 321 '"\\U0" ', 322 '"\\Ug" ', 323 '"\\U0g" ', 324 '"\\U00g" ', 325 '"\\U000g" ', 326 '"\\U000g" ', 327 '"\\U0000g" ', 328 '"\\U00000g" ', 329 '"\\U000000g" ', 330 '"\\U0000000g"', 331 ] 332 333 for test in tests: 334 with self.assertRaises(LexerError) as ctx: 335 Lexer.lex_string(test, 0) 336 self.assertEqual(ctx.exception.line, 1) 337 self.assertEqual(ctx.exception.column, 2) 338 339 340#------------------------------------------------------------------------------ 341# Lexer.lex() 342#------------------------------------------------------------------------------ 343 344class LexTest(unittest.TestCase): 345 """Unit tests for the Lexer.lex() method.""" 346 347 def test_lex_char(self): 348 """Test whether Lexer.lex() can lex a character.""" 349 350 token, end, lit = Lexer.lex('(', 0) 351 self.assertEqual(token, Token.LPAREN) 352 self.assertEqual(end, 1) 353 self.assertEqual(lit, None) 354 355 token, end, lit = Lexer.lex(')', 0) 356 self.assertEqual(token, Token.RPAREN) 357 self.assertEqual(end, 1) 358 self.assertEqual(lit, None) 359 360 token, end, lit = Lexer.lex('[', 0) 361 self.assertEqual(token, Token.LBRACKET) 362 self.assertEqual(end, 1) 363 self.assertEqual(lit, None) 364 365 token, end, lit = Lexer.lex(']', 0) 366 self.assertEqual(token, Token.RBRACKET) 367 self.assertEqual(end, 1) 368 self.assertEqual(lit, None) 369 370 token, end, lit = Lexer.lex('{', 0) 371 self.assertEqual(token, Token.LBRACE) 372 self.assertEqual(end, 1) 373 self.assertEqual(lit, None) 374 375 token, end, lit = Lexer.lex('}', 0) 376 self.assertEqual(token, Token.RBRACE) 377 self.assertEqual(end, 1) 378 self.assertEqual(lit, None) 379 380 token, end, lit = Lexer.lex(':', 0) 381 self.assertEqual(token, Token.COLON) 382 self.assertEqual(end, 1) 383 self.assertEqual(lit, None) 384 385 token, end, lit = Lexer.lex('=', 0) 386 self.assertEqual(token, Token.ASSIGN) 387 self.assertEqual(end, 1) 388 self.assertEqual(lit, None) 389 390 token, end, lit = Lexer.lex('+', 0) 391 self.assertEqual(token, Token.PLUS) 392 self.assertEqual(end, 1) 393 self.assertEqual(lit, None) 394 395 token, end, lit = Lexer.lex(',', 0) 396 self.assertEqual(token, Token.COMMA) 397 self.assertEqual(end, 1) 398 self.assertEqual(lit, None) 399 400 401 def test_lex_assign_plus(self): 402 """Test whether Lexer.lex() can lex `+=` without problems.""" 403 404 token, end, lit = Lexer.lex('+=', 0) 405 self.assertEqual(token, Token.ASSIGNPLUS) 406 self.assertEqual(end, 2) 407 self.assertEqual(lit, None) 408 409 410 def test_lex_space(self): 411 """Test whether Lexer.lex() can lex whitespaces.""" 412 413 token, end, lit = Lexer.lex(' ', 0) 414 self.assertEqual(token, Token.SPACE) 415 self.assertEqual(end, 1) 416 self.assertEqual(lit, None) 417 418 token, end, lit = Lexer.lex('\t', 0) 419 self.assertEqual(token, Token.SPACE) 420 self.assertEqual(end, 1) 421 self.assertEqual(lit, None) 422 423 token, end, lit = Lexer.lex('\r', 0) 424 self.assertEqual(token, Token.SPACE) 425 self.assertEqual(end, 1) 426 self.assertEqual(lit, None) 427 428 token, end, lit = Lexer.lex('\n', 0) 429 self.assertEqual(token, Token.SPACE) 430 self.assertEqual(end, 1) 431 self.assertEqual(lit, None) 432 433 token, end, lit = Lexer.lex('\n \r\t\n', 0) 434 self.assertEqual(token, Token.SPACE) 435 self.assertEqual(end, 5) 436 self.assertEqual(lit, None) 437 438 439 def test_lex_comment(self): 440 """Test whether Lexer.lex() can lex comments.""" 441 442 token, end, lit = Lexer.lex('// abcd', 0) 443 self.assertEqual(token, Token.COMMENT) 444 self.assertEqual(end, 7) 445 self.assertEqual(lit, None) 446 447 token, end, lit = Lexer.lex('// abcd\nnext', 0) 448 self.assertEqual(token, Token.COMMENT) 449 self.assertEqual(end, 7) 450 self.assertEqual(lit, None) 451 452 token, end, lit = Lexer.lex('/*a\nb*/', 0) 453 self.assertEqual(token, Token.COMMENT) 454 self.assertEqual(end, 7) 455 self.assertEqual(lit, None) 456 457 token, end, lit = Lexer.lex('/*a\n *b*/', 0) 458 self.assertEqual(token, Token.COMMENT) 459 self.assertEqual(end, 9) 460 self.assertEqual(lit, None) 461 462 token, end, lit = Lexer.lex('/*a**b*/', 0) 463 self.assertEqual(token, Token.COMMENT) 464 self.assertEqual(end, 8) 465 self.assertEqual(lit, None) 466 467 token, end, lit = Lexer.lex('/*a***b*/', 0) 468 self.assertEqual(token, Token.COMMENT) 469 self.assertEqual(end, 9) 470 self.assertEqual(lit, None) 471 472 token, end, lit = Lexer.lex('/**/', 0) 473 self.assertEqual(token, Token.COMMENT) 474 self.assertEqual(end, 4) 475 self.assertEqual(lit, None) 476 477 token, end, lit = Lexer.lex('/***/', 0) 478 self.assertEqual(token, Token.COMMENT) 479 self.assertEqual(end, 5) 480 self.assertEqual(lit, None) 481 482 token, end, lit = Lexer.lex('/**a*/', 0) 483 self.assertEqual(token, Token.COMMENT) 484 self.assertEqual(end, 6) 485 self.assertEqual(lit, None) 486 487 token, end, lit = Lexer.lex('/*a**/', 0) 488 self.assertEqual(token, Token.COMMENT) 489 self.assertEqual(end, 6) 490 self.assertEqual(lit, None) 491 492 token, end, lit = Lexer.lex('/***a*/', 0) 493 self.assertEqual(token, Token.COMMENT) 494 self.assertEqual(end, 7) 495 self.assertEqual(lit, None) 496 497 token, end, lit = Lexer.lex('/*a***/', 0) 498 self.assertEqual(token, Token.COMMENT) 499 self.assertEqual(end, 7) 500 self.assertEqual(lit, None) 501 502 503 def test_lex_string(self): 504 """Test whether Lexer.lex() can lex a string.""" 505 506 token, end, lit = Lexer.lex('"a"', 0) 507 self.assertEqual(token, Token.STRING) 508 self.assertEqual(end, 3) 509 self.assertEqual(lit, 'a') 510 511 token, end, lit = Lexer.lex('`a\nb`', 0) 512 self.assertEqual(token, Token.STRING) 513 self.assertEqual(end, 5) 514 self.assertEqual(lit, 'a\nb') 515 516 517 def test_lex_ident(self): 518 """Test whether Lexer.lex() can lex an identifier.""" 519 520 token, end, lit = Lexer.lex('ident', 0) 521 self.assertEqual(token, Token.IDENT) 522 self.assertEqual(end, 5) 523 self.assertEqual(lit, 'ident') 524 525 526 def test_lex_offset(self): 527 """Test the offset argument of Lexer.lex().""" 528 529 token, end, lit = Lexer.lex('a "b"', 0) 530 self.assertEqual(token, Token.IDENT) 531 self.assertEqual(end, 1) 532 self.assertEqual(lit, 'a') 533 534 token, end, lit = Lexer.lex('a "b"', end) 535 self.assertEqual(token, Token.SPACE) 536 self.assertEqual(end, 2) 537 self.assertEqual(lit, None) 538 539 token, end, lit = Lexer.lex('a "b"', end) 540 self.assertEqual(token, Token.STRING) 541 self.assertEqual(end, 5) 542 self.assertEqual(lit, 'b') 543 544 545#------------------------------------------------------------------------------ 546# Lexer class test 547#------------------------------------------------------------------------------ 548 549class LexerTest(unittest.TestCase): 550 """Unit tests for the Lexer class.""" 551 552 def test_lexer(self): 553 """Test token, start, end, literal, and consume().""" 554 555 lexer = Lexer('a b //a\n "c"', 0) 556 557 self.assertEqual(lexer.start, 0) 558 self.assertEqual(lexer.end, 1) 559 self.assertEqual(lexer.token, Token.IDENT) 560 self.assertEqual(lexer.literal, 'a') 561 lexer.consume(Token.IDENT) 562 563 self.assertEqual(lexer.start, 2) 564 self.assertEqual(lexer.end, 3) 565 self.assertEqual(lexer.token, Token.IDENT) 566 self.assertEqual(lexer.literal, 'b') 567 lexer.consume(Token.IDENT) 568 569 self.assertEqual(lexer.start, 9) 570 self.assertEqual(lexer.end, 12) 571 self.assertEqual(lexer.token, Token.STRING) 572 self.assertEqual(lexer.literal, 'c') 573 lexer.consume(Token.STRING) 574 575 self.assertEqual(lexer.start, 12) 576 self.assertEqual(lexer.end, 12) 577 self.assertEqual(lexer.token, Token.EOF) 578 self.assertEqual(lexer.literal, None) 579 580 581 def test_lexer_offset(self): 582 """Test the offset argument of Lexer.__init__().""" 583 584 lexer = Lexer('a b', 2) 585 586 self.assertEqual(lexer.start, 2) 587 self.assertEqual(lexer.end, 3) 588 self.assertEqual(lexer.token, Token.IDENT) 589 self.assertEqual(lexer.literal, 'b') 590 lexer.consume(Token.IDENT) 591 592 self.assertEqual(lexer.start, 3) 593 self.assertEqual(lexer.end, 3) 594 self.assertEqual(lexer.token, Token.EOF) 595 self.assertEqual(lexer.literal, None) 596 lexer.consume(Token.EOF) 597 598 599 def test_lexer_path(self): 600 """Test the path attribute of the Lexer object.""" 601 lexer = Lexer('content', path='test_path') 602 self.assertEqual(lexer.path, 'test_path') 603 604 605if __name__ == '__main__': 606 unittest.main() 607