• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# DExTer : Debugging Experience Tester
2# ~~~~~~   ~         ~~         ~   ~~
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7"""Parse a DExTer command. In particular, ensure that only a very limited
8subset of Python is allowed, in order to prevent the possibility of unsafe
9Python code being embedded within DExTer commands.
10"""
11
12import os
13import unittest
14from copy import copy
15
16from collections import defaultdict, OrderedDict
17
18from dex.utils.Exceptions import CommandParseError
19
20from dex.command.CommandBase import CommandBase
21from dex.command.commands.DexExpectProgramState import DexExpectProgramState
22from dex.command.commands.DexExpectStepKind import DexExpectStepKind
23from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder
24from dex.command.commands.DexExpectWatchType import DexExpectWatchType
25from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue
26from dex.command.commands.DexLabel import DexLabel
27from dex.command.commands.DexLimitSteps import DexLimitSteps
28from dex.command.commands.DexUnreachable import DexUnreachable
29from dex.command.commands.DexWatch import DexWatch
30from dex.utils import Timer
31from dex.utils.Exceptions import CommandParseError, DebuggerException
32
33def _get_valid_commands():
34    """Return all top level DExTer test commands.
35
36    Returns:
37        { name (str): command (class) }
38    """
39    return {
40      DexExpectProgramState.get_name() : DexExpectProgramState,
41      DexExpectStepKind.get_name() : DexExpectStepKind,
42      DexExpectStepOrder.get_name() : DexExpectStepOrder,
43      DexExpectWatchType.get_name() : DexExpectWatchType,
44      DexExpectWatchValue.get_name() : DexExpectWatchValue,
45      DexLabel.get_name() : DexLabel,
46      DexLimitSteps.get_name() : DexLimitSteps,
47      DexUnreachable.get_name() : DexUnreachable,
48      DexWatch.get_name() : DexWatch
49    }
50
51
52def _get_command_name(command_raw: str) -> str:
53    """Return command name by splitting up DExTer command contained in
54    command_raw on the first opening paranthesis and further stripping
55    any potential leading or trailing whitespace.
56    """
57    return command_raw.split('(', 1)[0].rstrip()
58
59
60def _merge_subcommands(command_name: str, valid_commands: dict) -> dict:
61    """Merge valid_commands and command_name's subcommands into a new dict.
62
63    Returns:
64        { name (str): command (class) }
65    """
66    subcommands = valid_commands[command_name].get_subcommands()
67    if subcommands:
68        return { **valid_commands, **subcommands }
69    return valid_commands
70
71
72def _build_command(command_type, raw_text: str, path: str, lineno: str) -> CommandBase:
73    """Build a command object from raw text.
74
75    This function will call eval().
76
77    Raises:
78        Any exception that eval() can raise.
79
80    Returns:
81        A dexter command object.
82    """
83    valid_commands = _merge_subcommands(
84        command_type.get_name(), { command_type.get_name(): command_type })
85    # pylint: disable=eval-used
86    command = eval(raw_text, valid_commands)
87    # pylint: enable=eval-used
88    command.raw_text = raw_text
89    command.path = path
90    command.lineno = lineno
91    return command
92
93
94def resolve_labels(command: CommandBase, commands: dict):
95    """Attempt to resolve any labels in command"""
96    dex_labels = commands['DexLabel']
97    command_label_args = command.get_label_args()
98    for command_arg in command_label_args:
99        for dex_label in list(dex_labels.values()):
100            if (os.path.samefile(dex_label.path, command.path) and
101                dex_label.eval() == command_arg):
102                command.resolve_label(dex_label.get_as_pair())
103    # labels for command should be resolved by this point.
104    if command.has_labels():
105        syntax_error = SyntaxError()
106        syntax_error.filename = command.path
107        syntax_error.lineno = command.lineno
108        syntax_error.offset = 0
109        syntax_error.msg = 'Unresolved labels'
110        for label in command.get_label_args():
111            syntax_error.msg += ' \'' + label + '\''
112        raise syntax_error
113
114
115def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
116    """Scan `line` for a string matching any key in `valid_commands`.
117
118    Start searching from `start`.
119    Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
120
121    Returns:
122        int: the index of the first character of the matching string in `line`
123        or -1 if no command is found.
124    """
125    for command in valid_commands:
126        idx = line.find(command, start)
127        if idx != -1:
128            # Ignore escaped '\' commands.
129            if idx > 0 and line[idx - 1] == '\\':
130                continue
131            return idx
132    return -1
133
134
135def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int):
136    """Find the end of a command by looking for balanced parentheses.
137
138    Args:
139        line: String to scan.
140        start: Index into `line` to start looking.
141        paren_balance(int): paren_balance after previous call.
142
143    Note:
144        On the first call `start` should point at the opening parenthesis and
145        `paren_balance` should be set to 0. Subsequent calls should pass in the
146        returned `paren_balance`.
147
148    Returns:
149        ( end,  paren_balance )
150        Where end is 1 + the index of the last char in the command or, if the
151        parentheses are not balanced, the end of the line.
152
153        paren_balance will be 0 when the parentheses are balanced.
154    """
155    for end in range(start, len(line)):
156        ch = line[end]
157        if ch == '(':
158            paren_balance += 1
159        elif ch == ')':
160            paren_balance -=1
161        if paren_balance == 0:
162            break
163    end += 1
164    return (end, paren_balance)
165
166
167class TextPoint():
168    def __init__(self, line, char):
169        self.line = line
170        self.char = char
171
172    def get_lineno(self):
173        return self.line + 1
174
175    def get_column(self):
176        return self.char + 1
177
178
179def format_parse_err(msg: str, path: str, lines: list, point: TextPoint) -> CommandParseError:
180    err = CommandParseError()
181    err.filename = path
182    err.src = lines[point.line].rstrip()
183    err.lineno = point.get_lineno()
184    err.info = msg
185    err.caret = '{}<r>^</>'.format(' ' * (point.char))
186    return err
187
188
189def skip_horizontal_whitespace(line, point):
190    for idx, char in enumerate(line[point.char:]):
191        if char not in ' \t':
192            point.char += idx
193            return
194
195
196def _find_all_commands_in_file(path, file_lines, valid_commands):
197    commands = defaultdict(dict)
198    paren_balance = 0
199    region_start = TextPoint(0, 0)
200    for region_start.line in range(len(file_lines)):
201        line = file_lines[region_start.line]
202        region_start.char = 0
203
204        # Search this line till we find no more commands.
205        while True:
206            # If parens are currently balanced we can look for a new command.
207            if paren_balance == 0:
208                region_start.char = _search_line_for_cmd_start(line, region_start.char, valid_commands)
209                if region_start.char == -1:
210                    break # Read next line.
211
212                command_name = _get_command_name(line[region_start.char:])
213                cmd_point = copy(region_start)
214                cmd_text_list = [command_name]
215
216                region_start.char += len(command_name) # Start searching for parens after cmd.
217                skip_horizontal_whitespace(line, region_start)
218                if region_start.char >= len(line) or line[region_start.char] != '(':
219                    raise format_parse_err(
220                        "Missing open parenthesis", path, file_lines, region_start)
221
222            end, paren_balance = _search_line_for_cmd_end(line, region_start.char, paren_balance)
223            # Add this text blob to the command.
224            cmd_text_list.append(line[region_start.char:end])
225            # Move parse ptr to end of line or parens
226            region_start.char = end
227
228            # If the parens are unbalanced start reading the next line in an attempt
229            # to find the end of the command.
230            if paren_balance != 0:
231                break  # Read next line.
232
233            # Parens are balanced, we have a full command to evaluate.
234            raw_text = "".join(cmd_text_list)
235            try:
236                command = _build_command(
237                    valid_commands[command_name],
238                    raw_text,
239                    path,
240                    cmd_point.get_lineno(),
241                )
242            except SyntaxError as e:
243                # This err should point to the problem line.
244                err_point = copy(cmd_point)
245                # To e the command start is the absolute start, so use as offset.
246                err_point.line += e.lineno - 1 # e.lineno is a position, not index.
247                err_point.char += e.offset - 1 # e.offset is a position, not index.
248                raise format_parse_err(e.msg, path, file_lines, err_point)
249            except TypeError as e:
250                # This err should always point to the end of the command name.
251                err_point = copy(cmd_point)
252                err_point.char += len(command_name)
253                raise format_parse_err(str(e), path, file_lines, err_point)
254            else:
255                resolve_labels(command, commands)
256                assert (path, cmd_point) not in commands[command_name], (
257                    command_name, commands[command_name])
258                commands[command_name][path, cmd_point] = command
259
260    if paren_balance != 0:
261        # This err should always point to the end of the command name.
262        err_point = copy(cmd_point)
263        err_point.char += len(command_name)
264        msg = "Unbalanced parenthesis starting here"
265        raise format_parse_err(msg, path, file_lines, err_point)
266    return dict(commands)
267
268def _find_all_commands(source_files):
269    commands = defaultdict(dict)
270    valid_commands = _get_valid_commands()
271    for source_file in source_files:
272        with open(source_file) as fp:
273            lines = fp.readlines()
274        file_commands = _find_all_commands_in_file(source_file, lines,
275                                                   valid_commands)
276        for command_name in file_commands:
277            commands[command_name].update(file_commands[command_name])
278
279    return dict(commands)
280
281def get_command_infos(source_files):
282  with Timer('parsing commands'):
283      try:
284          commands = _find_all_commands(source_files)
285          command_infos = OrderedDict()
286          for command_type in commands:
287              for command in commands[command_type].values():
288                  if command_type not in command_infos:
289                      command_infos[command_type] = []
290                  command_infos[command_type].append(command)
291          return OrderedDict(command_infos)
292      except CommandParseError as e:
293          msg = 'parser error: <d>{}({}):</> {}\n{}\n{}\n'.format(
294                e.filename, e.lineno, e.info, e.src, e.caret)
295          raise DebuggerException(msg)
296
297class TestParseCommand(unittest.TestCase):
298    class MockCmd(CommandBase):
299        """A mock DExTer command for testing parsing.
300
301        Args:
302            value (str): Unique name for this instance.
303        """
304
305        def __init__(self, *args):
306           self.value = args[0]
307
308        def get_name():
309            return __class__.__name__
310
311        def eval(this):
312            pass
313
314
315    def __init__(self, *args):
316        super().__init__(*args)
317
318        self.valid_commands = {
319            TestParseCommand.MockCmd.get_name() : TestParseCommand.MockCmd
320        }
321
322
323    def _find_all_commands_in_lines(self, lines):
324        """Use DExTer parsing methods to find all the mock commands in lines.
325
326        Returns:
327            { cmd_name: { (path, line): command_obj } }
328        """
329        return _find_all_commands_in_file(__file__, lines, self.valid_commands)
330
331
332    def _find_all_mock_values_in_lines(self, lines):
333        """Use DExTer parsing methods to find all mock command values in lines.
334
335        Returns:
336            values (list(str)): MockCmd values found in lines.
337        """
338        cmds = self._find_all_commands_in_lines(lines)
339        mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None)
340        return [v.value for v in mocks.values()] if mocks else []
341
342
343    def test_parse_inline(self):
344        """Commands can be embedded in other text."""
345
346        lines = [
347            'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n',
348            'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n',
349            'incididunt ut labore et dolore magna aliqua.\n'
350        ]
351
352        values = self._find_all_mock_values_in_lines(lines)
353
354        self.assertTrue('START' in values)
355        self.assertTrue('EMBEDDED' in values)
356
357
358    def test_parse_multi_line_comment(self):
359        """Multi-line commands can embed comments."""
360
361        lines = [
362            'Lorem ipsum dolor sit amet, consectetur\n',
363            'adipiscing elit, sed doeiusmod tempor,\n',
364            'incididunt ut labore et MockCmd(\n',
365            '    "WITH_COMMENT" # THIS IS A COMMENT\n',
366            ') dolore magna aliqua. Ut enim ad minim\n',
367        ]
368
369        values = self._find_all_mock_values_in_lines(lines)
370
371        self.assertTrue('WITH_COMMENT' in values)
372
373    def test_parse_empty(self):
374        """Empty files are silently ignored."""
375
376        lines = []
377        values = self._find_all_mock_values_in_lines(lines)
378        self.assertTrue(len(values) == 0)
379
380    def test_parse_bad_whitespace(self):
381        """Throw exception when parsing badly formed whitespace."""
382        lines = [
383            'MockCmd\n',
384            '("XFAIL_CMD_LF_PAREN")\n',
385        ]
386
387        with self.assertRaises(CommandParseError):
388            values = self._find_all_mock_values_in_lines(lines)
389
390    def test_parse_good_whitespace(self):
391        """Try to emulate python whitespace rules"""
392
393        lines = [
394            'MockCmd("NONE")\n',
395            'MockCmd    ("SPACE")\n',
396            'MockCmd\t\t("TABS")\n',
397            'MockCmd(    "ARG_SPACE"    )\n',
398            'MockCmd(\t\t"ARG_TABS"\t\t)\n',
399            'MockCmd(\n',
400            '"CMD_PAREN_LF")\n',
401        ]
402
403        values = self._find_all_mock_values_in_lines(lines)
404
405        self.assertTrue('NONE' in values)
406        self.assertTrue('SPACE' in values)
407        self.assertTrue('TABS' in values)
408        self.assertTrue('ARG_SPACE' in values)
409        self.assertTrue('ARG_TABS' in values)
410        self.assertTrue('CMD_PAREN_LF' in values)
411
412
413    def test_parse_share_line(self):
414        """More than one command can appear on one line."""
415
416        lines = [
417            'MockCmd("START") MockCmd("CONSECUTIVE") words '
418                'MockCmd("EMBEDDED") more words\n'
419        ]
420
421        values = self._find_all_mock_values_in_lines(lines)
422
423        self.assertTrue('START' in values)
424        self.assertTrue('CONSECUTIVE' in values)
425        self.assertTrue('EMBEDDED' in values)
426
427
428    def test_parse_escaped(self):
429        """Escaped commands are ignored."""
430
431        lines = [
432            'words \MockCmd("IGNORED") words words words\n'
433        ]
434
435        values = self._find_all_mock_values_in_lines(lines)
436
437        self.assertFalse('IGNORED' in values)
438