1# DExTer : Debugging Experience Tester 2# ~~~~~~ ~ ~~ ~ ~~ 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7"""Parse a DExTer command. In particular, ensure that only a very limited 8subset of Python is allowed, in order to prevent the possibility of unsafe 9Python code being embedded within DExTer commands. 10""" 11 12import os 13import unittest 14from copy import copy 15 16from collections import defaultdict, OrderedDict 17 18from dex.utils.Exceptions import CommandParseError 19 20from dex.command.CommandBase import CommandBase 21from dex.command.commands.DexExpectProgramState import DexExpectProgramState 22from dex.command.commands.DexExpectStepKind import DexExpectStepKind 23from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder 24from dex.command.commands.DexExpectWatchType import DexExpectWatchType 25from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue 26from dex.command.commands.DexLabel import DexLabel 27from dex.command.commands.DexLimitSteps import DexLimitSteps 28from dex.command.commands.DexUnreachable import DexUnreachable 29from dex.command.commands.DexWatch import DexWatch 30from dex.utils import Timer 31from dex.utils.Exceptions import CommandParseError, DebuggerException 32 33def _get_valid_commands(): 34 """Return all top level DExTer test commands. 35 36 Returns: 37 { name (str): command (class) } 38 """ 39 return { 40 DexExpectProgramState.get_name() : DexExpectProgramState, 41 DexExpectStepKind.get_name() : DexExpectStepKind, 42 DexExpectStepOrder.get_name() : DexExpectStepOrder, 43 DexExpectWatchType.get_name() : DexExpectWatchType, 44 DexExpectWatchValue.get_name() : DexExpectWatchValue, 45 DexLabel.get_name() : DexLabel, 46 DexLimitSteps.get_name() : DexLimitSteps, 47 DexUnreachable.get_name() : DexUnreachable, 48 DexWatch.get_name() : DexWatch 49 } 50 51 52def _get_command_name(command_raw: str) -> str: 53 """Return command name by splitting up DExTer command contained in 54 command_raw on the first opening paranthesis and further stripping 55 any potential leading or trailing whitespace. 56 """ 57 return command_raw.split('(', 1)[0].rstrip() 58 59 60def _merge_subcommands(command_name: str, valid_commands: dict) -> dict: 61 """Merge valid_commands and command_name's subcommands into a new dict. 62 63 Returns: 64 { name (str): command (class) } 65 """ 66 subcommands = valid_commands[command_name].get_subcommands() 67 if subcommands: 68 return { **valid_commands, **subcommands } 69 return valid_commands 70 71 72def _build_command(command_type, raw_text: str, path: str, lineno: str) -> CommandBase: 73 """Build a command object from raw text. 74 75 This function will call eval(). 76 77 Raises: 78 Any exception that eval() can raise. 79 80 Returns: 81 A dexter command object. 82 """ 83 valid_commands = _merge_subcommands( 84 command_type.get_name(), { command_type.get_name(): command_type }) 85 # pylint: disable=eval-used 86 command = eval(raw_text, valid_commands) 87 # pylint: enable=eval-used 88 command.raw_text = raw_text 89 command.path = path 90 command.lineno = lineno 91 return command 92 93 94def resolve_labels(command: CommandBase, commands: dict): 95 """Attempt to resolve any labels in command""" 96 dex_labels = commands['DexLabel'] 97 command_label_args = command.get_label_args() 98 for command_arg in command_label_args: 99 for dex_label in list(dex_labels.values()): 100 if (os.path.samefile(dex_label.path, command.path) and 101 dex_label.eval() == command_arg): 102 command.resolve_label(dex_label.get_as_pair()) 103 # labels for command should be resolved by this point. 104 if command.has_labels(): 105 syntax_error = SyntaxError() 106 syntax_error.filename = command.path 107 syntax_error.lineno = command.lineno 108 syntax_error.offset = 0 109 syntax_error.msg = 'Unresolved labels' 110 for label in command.get_label_args(): 111 syntax_error.msg += ' \'' + label + '\'' 112 raise syntax_error 113 114 115def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int: 116 """Scan `line` for a string matching any key in `valid_commands`. 117 118 Start searching from `start`. 119 Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored. 120 121 Returns: 122 int: the index of the first character of the matching string in `line` 123 or -1 if no command is found. 124 """ 125 for command in valid_commands: 126 idx = line.find(command, start) 127 if idx != -1: 128 # Ignore escaped '\' commands. 129 if idx > 0 and line[idx - 1] == '\\': 130 continue 131 return idx 132 return -1 133 134 135def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int): 136 """Find the end of a command by looking for balanced parentheses. 137 138 Args: 139 line: String to scan. 140 start: Index into `line` to start looking. 141 paren_balance(int): paren_balance after previous call. 142 143 Note: 144 On the first call `start` should point at the opening parenthesis and 145 `paren_balance` should be set to 0. Subsequent calls should pass in the 146 returned `paren_balance`. 147 148 Returns: 149 ( end, paren_balance ) 150 Where end is 1 + the index of the last char in the command or, if the 151 parentheses are not balanced, the end of the line. 152 153 paren_balance will be 0 when the parentheses are balanced. 154 """ 155 for end in range(start, len(line)): 156 ch = line[end] 157 if ch == '(': 158 paren_balance += 1 159 elif ch == ')': 160 paren_balance -=1 161 if paren_balance == 0: 162 break 163 end += 1 164 return (end, paren_balance) 165 166 167class TextPoint(): 168 def __init__(self, line, char): 169 self.line = line 170 self.char = char 171 172 def get_lineno(self): 173 return self.line + 1 174 175 def get_column(self): 176 return self.char + 1 177 178 179def format_parse_err(msg: str, path: str, lines: list, point: TextPoint) -> CommandParseError: 180 err = CommandParseError() 181 err.filename = path 182 err.src = lines[point.line].rstrip() 183 err.lineno = point.get_lineno() 184 err.info = msg 185 err.caret = '{}<r>^</>'.format(' ' * (point.char)) 186 return err 187 188 189def skip_horizontal_whitespace(line, point): 190 for idx, char in enumerate(line[point.char:]): 191 if char not in ' \t': 192 point.char += idx 193 return 194 195 196def _find_all_commands_in_file(path, file_lines, valid_commands): 197 commands = defaultdict(dict) 198 paren_balance = 0 199 region_start = TextPoint(0, 0) 200 for region_start.line in range(len(file_lines)): 201 line = file_lines[region_start.line] 202 region_start.char = 0 203 204 # Search this line till we find no more commands. 205 while True: 206 # If parens are currently balanced we can look for a new command. 207 if paren_balance == 0: 208 region_start.char = _search_line_for_cmd_start(line, region_start.char, valid_commands) 209 if region_start.char == -1: 210 break # Read next line. 211 212 command_name = _get_command_name(line[region_start.char:]) 213 cmd_point = copy(region_start) 214 cmd_text_list = [command_name] 215 216 region_start.char += len(command_name) # Start searching for parens after cmd. 217 skip_horizontal_whitespace(line, region_start) 218 if region_start.char >= len(line) or line[region_start.char] != '(': 219 raise format_parse_err( 220 "Missing open parenthesis", path, file_lines, region_start) 221 222 end, paren_balance = _search_line_for_cmd_end(line, region_start.char, paren_balance) 223 # Add this text blob to the command. 224 cmd_text_list.append(line[region_start.char:end]) 225 # Move parse ptr to end of line or parens 226 region_start.char = end 227 228 # If the parens are unbalanced start reading the next line in an attempt 229 # to find the end of the command. 230 if paren_balance != 0: 231 break # Read next line. 232 233 # Parens are balanced, we have a full command to evaluate. 234 raw_text = "".join(cmd_text_list) 235 try: 236 command = _build_command( 237 valid_commands[command_name], 238 raw_text, 239 path, 240 cmd_point.get_lineno(), 241 ) 242 except SyntaxError as e: 243 # This err should point to the problem line. 244 err_point = copy(cmd_point) 245 # To e the command start is the absolute start, so use as offset. 246 err_point.line += e.lineno - 1 # e.lineno is a position, not index. 247 err_point.char += e.offset - 1 # e.offset is a position, not index. 248 raise format_parse_err(e.msg, path, file_lines, err_point) 249 except TypeError as e: 250 # This err should always point to the end of the command name. 251 err_point = copy(cmd_point) 252 err_point.char += len(command_name) 253 raise format_parse_err(str(e), path, file_lines, err_point) 254 else: 255 resolve_labels(command, commands) 256 assert (path, cmd_point) not in commands[command_name], ( 257 command_name, commands[command_name]) 258 commands[command_name][path, cmd_point] = command 259 260 if paren_balance != 0: 261 # This err should always point to the end of the command name. 262 err_point = copy(cmd_point) 263 err_point.char += len(command_name) 264 msg = "Unbalanced parenthesis starting here" 265 raise format_parse_err(msg, path, file_lines, err_point) 266 return dict(commands) 267 268def _find_all_commands(source_files): 269 commands = defaultdict(dict) 270 valid_commands = _get_valid_commands() 271 for source_file in source_files: 272 with open(source_file) as fp: 273 lines = fp.readlines() 274 file_commands = _find_all_commands_in_file(source_file, lines, 275 valid_commands) 276 for command_name in file_commands: 277 commands[command_name].update(file_commands[command_name]) 278 279 return dict(commands) 280 281def get_command_infos(source_files): 282 with Timer('parsing commands'): 283 try: 284 commands = _find_all_commands(source_files) 285 command_infos = OrderedDict() 286 for command_type in commands: 287 for command in commands[command_type].values(): 288 if command_type not in command_infos: 289 command_infos[command_type] = [] 290 command_infos[command_type].append(command) 291 return OrderedDict(command_infos) 292 except CommandParseError as e: 293 msg = 'parser error: <d>{}({}):</> {}\n{}\n{}\n'.format( 294 e.filename, e.lineno, e.info, e.src, e.caret) 295 raise DebuggerException(msg) 296 297class TestParseCommand(unittest.TestCase): 298 class MockCmd(CommandBase): 299 """A mock DExTer command for testing parsing. 300 301 Args: 302 value (str): Unique name for this instance. 303 """ 304 305 def __init__(self, *args): 306 self.value = args[0] 307 308 def get_name(): 309 return __class__.__name__ 310 311 def eval(this): 312 pass 313 314 315 def __init__(self, *args): 316 super().__init__(*args) 317 318 self.valid_commands = { 319 TestParseCommand.MockCmd.get_name() : TestParseCommand.MockCmd 320 } 321 322 323 def _find_all_commands_in_lines(self, lines): 324 """Use DExTer parsing methods to find all the mock commands in lines. 325 326 Returns: 327 { cmd_name: { (path, line): command_obj } } 328 """ 329 return _find_all_commands_in_file(__file__, lines, self.valid_commands) 330 331 332 def _find_all_mock_values_in_lines(self, lines): 333 """Use DExTer parsing methods to find all mock command values in lines. 334 335 Returns: 336 values (list(str)): MockCmd values found in lines. 337 """ 338 cmds = self._find_all_commands_in_lines(lines) 339 mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None) 340 return [v.value for v in mocks.values()] if mocks else [] 341 342 343 def test_parse_inline(self): 344 """Commands can be embedded in other text.""" 345 346 lines = [ 347 'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n', 348 'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n', 349 'incididunt ut labore et dolore magna aliqua.\n' 350 ] 351 352 values = self._find_all_mock_values_in_lines(lines) 353 354 self.assertTrue('START' in values) 355 self.assertTrue('EMBEDDED' in values) 356 357 358 def test_parse_multi_line_comment(self): 359 """Multi-line commands can embed comments.""" 360 361 lines = [ 362 'Lorem ipsum dolor sit amet, consectetur\n', 363 'adipiscing elit, sed doeiusmod tempor,\n', 364 'incididunt ut labore et MockCmd(\n', 365 ' "WITH_COMMENT" # THIS IS A COMMENT\n', 366 ') dolore magna aliqua. Ut enim ad minim\n', 367 ] 368 369 values = self._find_all_mock_values_in_lines(lines) 370 371 self.assertTrue('WITH_COMMENT' in values) 372 373 def test_parse_empty(self): 374 """Empty files are silently ignored.""" 375 376 lines = [] 377 values = self._find_all_mock_values_in_lines(lines) 378 self.assertTrue(len(values) == 0) 379 380 def test_parse_bad_whitespace(self): 381 """Throw exception when parsing badly formed whitespace.""" 382 lines = [ 383 'MockCmd\n', 384 '("XFAIL_CMD_LF_PAREN")\n', 385 ] 386 387 with self.assertRaises(CommandParseError): 388 values = self._find_all_mock_values_in_lines(lines) 389 390 def test_parse_good_whitespace(self): 391 """Try to emulate python whitespace rules""" 392 393 lines = [ 394 'MockCmd("NONE")\n', 395 'MockCmd ("SPACE")\n', 396 'MockCmd\t\t("TABS")\n', 397 'MockCmd( "ARG_SPACE" )\n', 398 'MockCmd(\t\t"ARG_TABS"\t\t)\n', 399 'MockCmd(\n', 400 '"CMD_PAREN_LF")\n', 401 ] 402 403 values = self._find_all_mock_values_in_lines(lines) 404 405 self.assertTrue('NONE' in values) 406 self.assertTrue('SPACE' in values) 407 self.assertTrue('TABS' in values) 408 self.assertTrue('ARG_SPACE' in values) 409 self.assertTrue('ARG_TABS' in values) 410 self.assertTrue('CMD_PAREN_LF' in values) 411 412 413 def test_parse_share_line(self): 414 """More than one command can appear on one line.""" 415 416 lines = [ 417 'MockCmd("START") MockCmd("CONSECUTIVE") words ' 418 'MockCmd("EMBEDDED") more words\n' 419 ] 420 421 values = self._find_all_mock_values_in_lines(lines) 422 423 self.assertTrue('START' in values) 424 self.assertTrue('CONSECUTIVE' in values) 425 self.assertTrue('EMBEDDED' in values) 426 427 428 def test_parse_escaped(self): 429 """Escaped commands are ignored.""" 430 431 lines = [ 432 'words \MockCmd("IGNORED") words words words\n' 433 ] 434 435 values = self._find_all_mock_values_in_lines(lines) 436 437 self.assertFalse('IGNORED' in values) 438