• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import
2import difflib
3import errno
4import functools
5import io
6import itertools
7import getopt
8import os, signal, subprocess, sys
9import re
10import stat
11import platform
12import shutil
13import tempfile
14import threading
15
16import io
17try:
18    from StringIO import StringIO
19except ImportError:
20    from io import StringIO
21
22from lit.ShCommands import GlobItem
23import lit.ShUtil as ShUtil
24import lit.Test as Test
25import lit.util
26from lit.util import to_bytes, to_string
27from lit.BooleanExpression import BooleanExpression
28
29class InternalShellError(Exception):
30    def __init__(self, command, message):
31        self.command = command
32        self.message = message
33
34kIsWindows = platform.system() == 'Windows'
35
36# Don't use close_fds on Windows.
37kUseCloseFDs = not kIsWindows
38
39# Use temporary files to replace /dev/null on Windows.
40kAvoidDevNull = kIsWindows
41kDevNull = "/dev/null"
42
43# A regex that matches %dbg(ARG), which lit inserts at the beginning of each
44# run command pipeline such that ARG specifies the pipeline's source line
45# number.  lit later expands each %dbg(ARG) to a command that behaves as a null
46# command in the target shell so that the line number is seen in lit's verbose
47# mode.
48#
49# This regex captures ARG.  ARG must not contain a right parenthesis, which
50# terminates %dbg.  ARG must not contain quotes, in which ARG might be enclosed
51# during expansion.
52kPdbgRegex = '%dbg\(([^)\'"]*)\)'
53
54class ShellEnvironment(object):
55
56    """Mutable shell environment containing things like CWD and env vars.
57
58    Environment variables are not implemented, but cwd tracking is.
59    """
60
61    def __init__(self, cwd, env):
62        self.cwd = cwd
63        self.env = dict(env)
64
65class TimeoutHelper(object):
66    """
67        Object used to helper manage enforcing a timeout in
68        _executeShCmd(). It is passed through recursive calls
69        to collect processes that have been executed so that when
70        the timeout happens they can be killed.
71    """
72    def __init__(self, timeout):
73        self.timeout = timeout
74        self._procs = []
75        self._timeoutReached = False
76        self._doneKillPass = False
77        # This lock will be used to protect concurrent access
78        # to _procs and _doneKillPass
79        self._lock = None
80        self._timer = None
81
82    def cancel(self):
83        if not self.active():
84            return
85        self._timer.cancel()
86
87    def active(self):
88        return self.timeout > 0
89
90    def addProcess(self, proc):
91        if not self.active():
92            return
93        needToRunKill = False
94        with self._lock:
95            self._procs.append(proc)
96            # Avoid re-entering the lock by finding out if kill needs to be run
97            # again here but call it if necessary once we have left the lock.
98            # We could use a reentrant lock here instead but this code seems
99            # clearer to me.
100            needToRunKill = self._doneKillPass
101
102        # The initial call to _kill() from the timer thread already happened so
103        # we need to call it again from this thread, otherwise this process
104        # will be left to run even though the timeout was already hit
105        if needToRunKill:
106            assert self.timeoutReached()
107            self._kill()
108
109    def startTimer(self):
110        if not self.active():
111            return
112
113        # Do some late initialisation that's only needed
114        # if there is a timeout set
115        self._lock = threading.Lock()
116        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
117        self._timer.start()
118
119    def _handleTimeoutReached(self):
120        self._timeoutReached = True
121        self._kill()
122
123    def timeoutReached(self):
124        return self._timeoutReached
125
126    def _kill(self):
127        """
128            This method may be called multiple times as we might get unlucky
129            and be in the middle of creating a new process in _executeShCmd()
130            which won't yet be in ``self._procs``. By locking here and in
131            addProcess() we should be able to kill processes launched after
132            the initial call to _kill()
133        """
134        with self._lock:
135            for p in self._procs:
136                lit.util.killProcessAndChildren(p.pid)
137            # Empty the list and note that we've done a pass over the list
138            self._procs = [] # Python2 doesn't have list.clear()
139            self._doneKillPass = True
140
141class ShellCommandResult(object):
142    """Captures the result of an individual command."""
143
144    def __init__(self, command, stdout, stderr, exitCode, timeoutReached,
145                 outputFiles = []):
146        self.command = command
147        self.stdout = stdout
148        self.stderr = stderr
149        self.exitCode = exitCode
150        self.timeoutReached = timeoutReached
151        self.outputFiles = list(outputFiles)
152
153def executeShCmd(cmd, shenv, results, timeout=0):
154    """
155        Wrapper around _executeShCmd that handles
156        timeout
157    """
158    # Use the helper even when no timeout is required to make
159    # other code simpler (i.e. avoid bunch of ``!= None`` checks)
160    timeoutHelper = TimeoutHelper(timeout)
161    if timeout > 0:
162        timeoutHelper.startTimer()
163    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
164    timeoutHelper.cancel()
165    timeoutInfo = None
166    if timeoutHelper.timeoutReached():
167        timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
168
169    return (finalExitCode, timeoutInfo)
170
171def expand_glob(arg, cwd):
172    if isinstance(arg, GlobItem):
173        return sorted(arg.resolve(cwd))
174    return [arg]
175
176def expand_glob_expressions(args, cwd):
177    result = [args[0]]
178    for arg in args[1:]:
179        result.extend(expand_glob(arg, cwd))
180    return result
181
182def quote_windows_command(seq):
183    """
184    Reimplement Python's private subprocess.list2cmdline for MSys compatibility
185
186    Based on CPython implementation here:
187      https://hg.python.org/cpython/file/849826a900d2/Lib/subprocess.py#l422
188
189    Some core util distributions (MSys) don't tokenize command line arguments
190    the same way that MSVC CRT does. Lit rolls its own quoting logic similar to
191    the stock CPython logic to paper over these quoting and tokenization rule
192    differences.
193
194    We use the same algorithm from MSDN as CPython
195    (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more
196    characters as needing quoting, such as double quotes themselves.
197    """
198    result = []
199    needquote = False
200    for arg in seq:
201        bs_buf = []
202
203        # Add a space to separate this argument from the others
204        if result:
205            result.append(' ')
206
207        # This logic differs from upstream list2cmdline.
208        needquote = (" " in arg) or ("\t" in arg) or ("\"" in arg) or not arg
209        if needquote:
210            result.append('"')
211
212        for c in arg:
213            if c == '\\':
214                # Don't know if we need to double yet.
215                bs_buf.append(c)
216            elif c == '"':
217                # Double backslashes.
218                result.append('\\' * len(bs_buf)*2)
219                bs_buf = []
220                result.append('\\"')
221            else:
222                # Normal char
223                if bs_buf:
224                    result.extend(bs_buf)
225                    bs_buf = []
226                result.append(c)
227
228        # Add remaining backslashes, if any.
229        if bs_buf:
230            result.extend(bs_buf)
231
232        if needquote:
233            result.extend(bs_buf)
234            result.append('"')
235
236    return ''.join(result)
237
238# cmd is export or env
239def updateEnv(env, cmd):
240    arg_idx = 1
241    unset_next_env_var = False
242    for arg_idx, arg in enumerate(cmd.args[1:]):
243        # Support for the -u flag (unsetting) for env command
244        # e.g., env -u FOO -u BAR will remove both FOO and BAR
245        # from the environment.
246        if arg == '-u':
247            unset_next_env_var = True
248            continue
249        if unset_next_env_var:
250            unset_next_env_var = False
251            if arg in env.env:
252                del env.env[arg]
253            continue
254
255        # Partition the string into KEY=VALUE.
256        key, eq, val = arg.partition('=')
257        # Stop if there was no equals.
258        if eq == '':
259            break
260        env.env[key] = val
261    cmd.args = cmd.args[arg_idx+1:]
262
263def executeBuiltinEcho(cmd, shenv):
264    """Interpret a redirected echo command"""
265    opened_files = []
266    stdin, stdout, stderr = processRedirects(cmd, subprocess.PIPE, shenv,
267                                             opened_files)
268    if stdin != subprocess.PIPE or stderr != subprocess.PIPE:
269        raise InternalShellError(
270                cmd, "stdin and stderr redirects not supported for echo")
271
272    # Some tests have un-redirected echo commands to help debug test failures.
273    # Buffer our output and return it to the caller.
274    is_redirected = True
275    encode = lambda x : x
276    if stdout == subprocess.PIPE:
277        is_redirected = False
278        stdout = StringIO()
279    elif kIsWindows:
280        # Reopen stdout in binary mode to avoid CRLF translation. The versions
281        # of echo we are replacing on Windows all emit plain LF, and the LLVM
282        # tests now depend on this.
283        # When we open as binary, however, this also means that we have to write
284        # 'bytes' objects to stdout instead of 'str' objects.
285        encode = lit.util.to_bytes
286        stdout = open(stdout.name, stdout.mode + 'b')
287        opened_files.append((None, None, stdout, None))
288
289    # Implement echo flags. We only support -e and -n, and not yet in
290    # combination. We have to ignore unknown flags, because `echo "-D FOO"`
291    # prints the dash.
292    args = cmd.args[1:]
293    interpret_escapes = False
294    write_newline = True
295    while len(args) >= 1 and args[0] in ('-e', '-n'):
296        flag = args[0]
297        args = args[1:]
298        if flag == '-e':
299            interpret_escapes = True
300        elif flag == '-n':
301            write_newline = False
302
303    def maybeUnescape(arg):
304        if not interpret_escapes:
305            return arg
306
307        arg = lit.util.to_bytes(arg)
308        codec = 'string_escape' if sys.version_info < (3,0) else 'unicode_escape'
309        return arg.decode(codec)
310
311    if args:
312        for arg in args[:-1]:
313            stdout.write(encode(maybeUnescape(arg)))
314            stdout.write(encode(' '))
315        stdout.write(encode(maybeUnescape(args[-1])))
316    if write_newline:
317        stdout.write(encode('\n'))
318
319    for (name, mode, f, path) in opened_files:
320        f.close()
321
322    if not is_redirected:
323        return stdout.getvalue()
324    return ""
325
326def executeBuiltinMkdir(cmd, cmd_shenv):
327    """executeBuiltinMkdir - Create new directories."""
328    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
329    try:
330        opts, args = getopt.gnu_getopt(args, 'p')
331    except getopt.GetoptError as err:
332        raise InternalShellError(cmd, "Unsupported: 'mkdir':  %s" % str(err))
333
334    parent = False
335    for o, a in opts:
336        if o == "-p":
337            parent = True
338        else:
339            assert False, "unhandled option"
340
341    if len(args) == 0:
342        raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand")
343
344    stderr = StringIO()
345    exitCode = 0
346    for dir in args:
347        if not os.path.isabs(dir):
348            dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir))
349        if parent:
350            lit.util.mkdir_p(dir)
351        else:
352            try:
353                os.mkdir(dir)
354            except OSError as err:
355                stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
356                exitCode = 1
357    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
358
359def executeBuiltinDiff(cmd, cmd_shenv):
360    """executeBuiltinDiff - Compare files line by line."""
361    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
362    try:
363        opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"])
364    except getopt.GetoptError as err:
365        raise InternalShellError(cmd, "Unsupported: 'diff':  %s" % str(err))
366
367    filelines, filepaths, dir_trees = ([] for i in range(3))
368    ignore_all_space = False
369    ignore_space_change = False
370    unified_diff = False
371    recursive_diff = False
372    strip_trailing_cr = False
373    for o, a in opts:
374        if o == "-w":
375            ignore_all_space = True
376        elif o == "-b":
377            ignore_space_change = True
378        elif o == "-u":
379            unified_diff = True
380        elif o == "-r":
381            recursive_diff = True
382        elif o == "--strip-trailing-cr":
383            strip_trailing_cr = True
384        else:
385            assert False, "unhandled option"
386
387    if len(args) != 2:
388        raise InternalShellError(cmd, "Error:  missing or extra operand")
389
390    def getDirTree(path, basedir=""):
391        # Tree is a tuple of form (dirname, child_trees).
392        # An empty dir has child_trees = [], a file has child_trees = None.
393        child_trees = []
394        for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
395            for child_dir in child_dirs:
396                child_trees.append(getDirTree(child_dir, dirname))
397            for filename in files:
398                child_trees.append((filename, None))
399            return path, sorted(child_trees)
400
401    def compareTwoFiles(filepaths):
402        compare_bytes = False
403        encoding = None
404        filelines = []
405        for file in filepaths:
406            try:
407                with open(file, 'r') as f:
408                    filelines.append(f.readlines())
409            except UnicodeDecodeError:
410                try:
411                    with io.open(file, 'r', encoding="utf-8") as f:
412                        filelines.append(f.readlines())
413                    encoding = "utf-8"
414                except:
415                    compare_bytes = True
416
417        if compare_bytes:
418            return compareTwoBinaryFiles(filepaths)
419        else:
420            return compareTwoTextFiles(filepaths, encoding)
421
422    def compareTwoBinaryFiles(filepaths):
423        filelines = []
424        for file in filepaths:
425            with open(file, 'rb') as f:
426                filelines.append(f.readlines())
427
428        exitCode = 0
429        if hasattr(difflib, 'diff_bytes'):
430            # python 3.5 or newer
431            diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
432            diffs = [diff.decode() for diff in diffs]
433        else:
434            # python 2.7
435            func = difflib.unified_diff if unified_diff else difflib.context_diff
436            diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
437
438        for diff in diffs:
439            stdout.write(diff)
440            exitCode = 1
441        return exitCode
442
443    def compareTwoTextFiles(filepaths, encoding):
444        filelines = []
445        for file in filepaths:
446            if encoding is None:
447                with open(file, 'r') as f:
448                    filelines.append(f.readlines())
449            else:
450                with io.open(file, 'r', encoding=encoding) as f:
451                    filelines.append(f.readlines())
452
453        exitCode = 0
454        def compose2(f, g):
455            return lambda x: f(g(x))
456
457        f = lambda x: x
458        if strip_trailing_cr:
459            f = compose2(lambda line: line.rstrip('\r'), f)
460        if ignore_all_space or ignore_space_change:
461            ignoreSpace = lambda line, separator: separator.join(line.split())
462            ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if ignore_all_space else ' ')
463            f = compose2(ignoreAllSpaceOrSpaceChange, f)
464
465        for idx, lines in enumerate(filelines):
466            filelines[idx]= [f(line) for line in lines]
467
468        func = difflib.unified_diff if unified_diff else difflib.context_diff
469        for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
470            stdout.write(diff)
471            exitCode = 1
472        return exitCode
473
474    def printDirVsFile(dir_path, file_path):
475        if os.path.getsize(file_path):
476            msg = "File %s is a directory while file %s is a regular file"
477        else:
478            msg = "File %s is a directory while file %s is a regular empty file"
479        stdout.write(msg % (dir_path, file_path) + "\n")
480
481    def printFileVsDir(file_path, dir_path):
482        if os.path.getsize(file_path):
483            msg = "File %s is a regular file while file %s is a directory"
484        else:
485            msg = "File %s is a regular empty file while file %s is a directory"
486        stdout.write(msg % (file_path, dir_path) + "\n")
487
488    def printOnlyIn(basedir, path, name):
489        stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
490
491    def compareDirTrees(dir_trees, base_paths=["", ""]):
492        # Dirnames of the trees are not checked, it's caller's responsibility,
493        # as top-level dirnames are always different. Base paths are important
494        # for doing os.walk, but we don't put it into tree's dirname in order
495        # to speed up string comparison below and while sorting in getDirTree.
496        left_tree, right_tree = dir_trees[0], dir_trees[1]
497        left_base, right_base = base_paths[0], base_paths[1]
498
499        # Compare two files or report file vs. directory mismatch.
500        if left_tree[1] is None and right_tree[1] is None:
501            return compareTwoFiles([os.path.join(left_base, left_tree[0]),
502                                    os.path.join(right_base, right_tree[0])])
503
504        if left_tree[1] is None and right_tree[1] is not None:
505            printFileVsDir(os.path.join(left_base, left_tree[0]),
506                           os.path.join(right_base, right_tree[0]))
507            return 1
508
509        if left_tree[1] is not None and right_tree[1] is None:
510            printDirVsFile(os.path.join(left_base, left_tree[0]),
511                           os.path.join(right_base, right_tree[0]))
512            return 1
513
514        # Compare two directories via recursive use of compareDirTrees.
515        exitCode = 0
516        left_names = [node[0] for node in left_tree[1]]
517        right_names = [node[0] for node in right_tree[1]]
518        l, r = 0, 0
519        while l < len(left_names) and r < len(right_names):
520            # Names are sorted in getDirTree, rely on that order.
521            if left_names[l] < right_names[r]:
522                exitCode = 1
523                printOnlyIn(left_base, left_tree[0], left_names[l])
524                l += 1
525            elif left_names[l] > right_names[r]:
526                exitCode = 1
527                printOnlyIn(right_base, right_tree[0], right_names[r])
528                r += 1
529            else:
530                exitCode |= compareDirTrees([left_tree[1][l], right_tree[1][r]],
531                                            [os.path.join(left_base, left_tree[0]),
532                                            os.path.join(right_base, right_tree[0])])
533                l += 1
534                r += 1
535
536        # At least one of the trees has ended. Report names from the other tree.
537        while l < len(left_names):
538            exitCode = 1
539            printOnlyIn(left_base, left_tree[0], left_names[l])
540            l += 1
541        while r < len(right_names):
542            exitCode = 1
543            printOnlyIn(right_base, right_tree[0], right_names[r])
544            r += 1
545        return exitCode
546
547    stderr = StringIO()
548    stdout = StringIO()
549    exitCode = 0
550    try:
551        for file in args:
552            if not os.path.isabs(file):
553                file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
554
555            if recursive_diff:
556                dir_trees.append(getDirTree(file))
557            else:
558                filepaths.append(file)
559
560        if not recursive_diff:
561            exitCode = compareTwoFiles(filepaths)
562        else:
563            exitCode = compareDirTrees(dir_trees)
564
565    except IOError as err:
566        stderr.write("Error: 'diff' command failed, %s\n" % str(err))
567        exitCode = 1
568
569    return ShellCommandResult(cmd, stdout.getvalue(), stderr.getvalue(), exitCode, False)
570
571def executeBuiltinRm(cmd, cmd_shenv):
572    """executeBuiltinRm - Removes (deletes) files or directories."""
573    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
574    try:
575        opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"])
576    except getopt.GetoptError as err:
577        raise InternalShellError(cmd, "Unsupported: 'rm':  %s" % str(err))
578
579    force = False
580    recursive = False
581    for o, a in opts:
582        if o == "-f":
583            force = True
584        elif o in ("-r", "-R", "--recursive"):
585            recursive = True
586        else:
587            assert False, "unhandled option"
588
589    if len(args) == 0:
590        raise InternalShellError(cmd, "Error: 'rm' is missing an operand")
591
592    def on_rm_error(func, path, exc_info):
593        # path contains the path of the file that couldn't be removed
594        # let's just assume that it's read-only and remove it.
595        os.chmod(path, stat.S_IMODE( os.stat(path).st_mode) | stat.S_IWRITE)
596        os.remove(path)
597
598    stderr = StringIO()
599    exitCode = 0
600    for path in args:
601        if not os.path.isabs(path):
602            path = os.path.realpath(os.path.join(cmd_shenv.cwd, path))
603        if force and not os.path.exists(path):
604            continue
605        try:
606            if os.path.isdir(path):
607                if not recursive:
608                    stderr.write("Error: %s is a directory\n" % path)
609                    exitCode = 1
610                shutil.rmtree(path, onerror = on_rm_error if force else None)
611            else:
612                if force and not os.access(path, os.W_OK):
613                    os.chmod(path,
614                             stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
615                os.remove(path)
616        except OSError as err:
617            stderr.write("Error: 'rm' command failed, %s" % str(err))
618            exitCode = 1
619    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
620
621def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
622    """Return the standard fds for cmd after applying redirects
623
624    Returns the three standard file descriptors for the new child process.  Each
625    fd may be an open, writable file object or a sentinel value from the
626    subprocess module.
627    """
628
629    # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
630    # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
631    # from a file are represented with a list [file, mode, file-object]
632    # where file-object is initially None.
633    redirects = [(0,), (1,), (2,)]
634    for (op, filename) in cmd.redirects:
635        if op == ('>',2):
636            redirects[2] = [filename, 'w', None]
637        elif op == ('>>',2):
638            redirects[2] = [filename, 'a', None]
639        elif op == ('>&',2) and filename in '012':
640            redirects[2] = redirects[int(filename)]
641        elif op == ('>&',) or op == ('&>',):
642            redirects[1] = redirects[2] = [filename, 'w', None]
643        elif op == ('>',):
644            redirects[1] = [filename, 'w', None]
645        elif op == ('>>',):
646            redirects[1] = [filename, 'a', None]
647        elif op == ('<',):
648            redirects[0] = [filename, 'r', None]
649        else:
650            raise InternalShellError(cmd, "Unsupported redirect: %r" % ((op, filename),))
651
652    # Open file descriptors in a second pass.
653    std_fds = [None, None, None]
654    for (index, r) in enumerate(redirects):
655        # Handle the sentinel values for defaults up front.
656        if isinstance(r, tuple):
657            if r == (0,):
658                fd = stdin_source
659            elif r == (1,):
660                if index == 0:
661                    raise InternalShellError(cmd, "Unsupported redirect for stdin")
662                elif index == 1:
663                    fd = subprocess.PIPE
664                else:
665                    fd = subprocess.STDOUT
666            elif r == (2,):
667                if index != 2:
668                    raise InternalShellError(cmd, "Unsupported redirect on stdout")
669                fd = subprocess.PIPE
670            else:
671                raise InternalShellError(cmd, "Bad redirect")
672            std_fds[index] = fd
673            continue
674
675        (filename, mode, fd) = r
676
677        # Check if we already have an open fd. This can happen if stdout and
678        # stderr go to the same place.
679        if fd is not None:
680            std_fds[index] = fd
681            continue
682
683        redir_filename = None
684        name = expand_glob(filename, cmd_shenv.cwd)
685        if len(name) != 1:
686           raise InternalShellError(cmd, "Unsupported: glob in "
687                                    "redirect expanded to multiple files")
688        name = name[0]
689        if kAvoidDevNull and name == kDevNull:
690            fd = tempfile.TemporaryFile(mode=mode)
691        elif kIsWindows and name == '/dev/tty':
692            # Simulate /dev/tty on Windows.
693            # "CON" is a special filename for the console.
694            fd = open("CON", mode)
695        else:
696            # Make sure relative paths are relative to the cwd.
697            redir_filename = os.path.join(cmd_shenv.cwd, name)
698            fd = open(redir_filename, mode)
699        # Workaround a Win32 and/or subprocess bug when appending.
700        #
701        # FIXME: Actually, this is probably an instance of PR6753.
702        if mode == 'a':
703            fd.seek(0, 2)
704        # Mutate the underlying redirect list so that we can redirect stdout
705        # and stderr to the same place without opening the file twice.
706        r[2] = fd
707        opened_files.append((filename, mode, fd) + (redir_filename,))
708        std_fds[index] = fd
709
710    return std_fds
711
712def _executeShCmd(cmd, shenv, results, timeoutHelper):
713    if timeoutHelper.timeoutReached():
714        # Prevent further recursion if the timeout has been hit
715        # as we should try avoid launching more processes.
716        return None
717
718    if isinstance(cmd, ShUtil.Seq):
719        if cmd.op == ';':
720            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
721            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
722
723        if cmd.op == '&':
724            raise InternalShellError(cmd,"unsupported shell operator: '&'")
725
726        if cmd.op == '||':
727            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
728            if res != 0:
729                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
730            return res
731
732        if cmd.op == '&&':
733            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
734            if res is None:
735                return res
736
737            if res == 0:
738                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
739            return res
740
741        raise ValueError('Unknown shell command: %r' % cmd.op)
742    assert isinstance(cmd, ShUtil.Pipeline)
743
744    # Handle shell builtins first.
745    if cmd.commands[0].args[0] == 'cd':
746        if len(cmd.commands) != 1:
747            raise ValueError("'cd' cannot be part of a pipeline")
748        if len(cmd.commands[0].args) != 2:
749            raise ValueError("'cd' supports only one argument")
750        newdir = cmd.commands[0].args[1]
751        # Update the cwd in the parent environment.
752        if os.path.isabs(newdir):
753            shenv.cwd = newdir
754        else:
755            shenv.cwd = os.path.realpath(os.path.join(shenv.cwd, newdir))
756        # The cd builtin always succeeds. If the directory does not exist, the
757        # following Popen calls will fail instead.
758        return 0
759
760    # Handle "echo" as a builtin if it is not part of a pipeline. This greatly
761    # speeds up tests that construct input files by repeatedly echo-appending to
762    # a file.
763    # FIXME: Standardize on the builtin echo implementation. We can use a
764    # temporary file to sidestep blocking pipe write issues.
765    if cmd.commands[0].args[0] == 'echo' and len(cmd.commands) == 1:
766        output = executeBuiltinEcho(cmd.commands[0], shenv)
767        results.append(ShellCommandResult(cmd.commands[0], output, "", 0,
768                                          False))
769        return 0
770
771    if cmd.commands[0].args[0] == 'export':
772        if len(cmd.commands) != 1:
773            raise ValueError("'export' cannot be part of a pipeline")
774        if len(cmd.commands[0].args) != 2:
775            raise ValueError("'export' supports only one argument")
776        updateEnv(shenv, cmd.commands[0])
777        return 0
778
779    if cmd.commands[0].args[0] == 'mkdir':
780        if len(cmd.commands) != 1:
781            raise InternalShellError(cmd.commands[0], "Unsupported: 'mkdir' "
782                                     "cannot be part of a pipeline")
783        cmdResult = executeBuiltinMkdir(cmd.commands[0], shenv)
784        results.append(cmdResult)
785        return cmdResult.exitCode
786
787    if cmd.commands[0].args[0] == 'diff':
788        if len(cmd.commands) != 1:
789            raise InternalShellError(cmd.commands[0], "Unsupported: 'diff' "
790                                     "cannot be part of a pipeline")
791        cmdResult = executeBuiltinDiff(cmd.commands[0], shenv)
792        results.append(cmdResult)
793        return cmdResult.exitCode
794
795    if cmd.commands[0].args[0] == 'rm':
796        if len(cmd.commands) != 1:
797            raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' "
798                                     "cannot be part of a pipeline")
799        cmdResult = executeBuiltinRm(cmd.commands[0], shenv)
800        results.append(cmdResult)
801        return cmdResult.exitCode
802
803    if cmd.commands[0].args[0] == ':':
804        if len(cmd.commands) != 1:
805            raise InternalShellError(cmd.commands[0], "Unsupported: ':' "
806                                     "cannot be part of a pipeline")
807        results.append(ShellCommandResult(cmd.commands[0], '', '', 0, False))
808        return 0;
809
810    procs = []
811    default_stdin = subprocess.PIPE
812    stderrTempFiles = []
813    opened_files = []
814    named_temp_files = []
815    builtin_commands = set(['cat'])
816    builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
817    # To avoid deadlock, we use a single stderr stream for piped
818    # output. This is null until we have seen some output using
819    # stderr.
820    for i,j in enumerate(cmd.commands):
821        # Reference the global environment by default.
822        cmd_shenv = shenv
823        if j.args[0] == 'env':
824            # Create a copy of the global environment and modify it for this one
825            # command. There might be multiple envs in a pipeline:
826            #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
827            cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
828            updateEnv(cmd_shenv, j)
829
830        stdin, stdout, stderr = processRedirects(j, default_stdin, cmd_shenv,
831                                                 opened_files)
832
833        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
834        # stderr on a pipe and treat it as stdout.
835        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
836            stderr = subprocess.PIPE
837            stderrIsStdout = True
838        else:
839            stderrIsStdout = False
840
841            # Don't allow stderr on a PIPE except for the last
842            # process, this could deadlock.
843            #
844            # FIXME: This is slow, but so is deadlock.
845            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
846                stderr = tempfile.TemporaryFile(mode='w+b')
847                stderrTempFiles.append((i, stderr))
848
849        # Resolve the executable path ourselves.
850        args = list(j.args)
851        executable = None
852        is_builtin_cmd = args[0] in builtin_commands;
853        if not is_builtin_cmd:
854            # For paths relative to cwd, use the cwd of the shell environment.
855            if args[0].startswith('.'):
856                exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
857                if os.path.isfile(exe_in_cwd):
858                    executable = exe_in_cwd
859            if not executable:
860                executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
861            if not executable:
862                raise InternalShellError(j, '%r: command not found' % j.args[0])
863
864        # Replace uses of /dev/null with temporary files.
865        if kAvoidDevNull:
866            # In Python 2.x, basestring is the base class for all string (including unicode)
867            # In Python 3.x, basestring no longer exist and str is always unicode
868            try:
869                str_type = basestring
870            except NameError:
871                str_type = str
872            for i,arg in enumerate(args):
873                if isinstance(arg, str_type) and kDevNull in arg:
874                    f = tempfile.NamedTemporaryFile(delete=False)
875                    f.close()
876                    named_temp_files.append(f.name)
877                    args[i] = arg.replace(kDevNull, f.name)
878
879        # Expand all glob expressions
880        args = expand_glob_expressions(args, cmd_shenv.cwd)
881        if is_builtin_cmd:
882            args.insert(0, sys.executable)
883            args[1] = os.path.join(builtin_commands_dir ,args[1] + ".py")
884
885        # On Windows, do our own command line quoting for better compatibility
886        # with some core utility distributions.
887        if kIsWindows:
888            args = quote_windows_command(args)
889
890        try:
891            procs.append(subprocess.Popen(args, cwd=cmd_shenv.cwd,
892                                          executable = executable,
893                                          stdin = stdin,
894                                          stdout = stdout,
895                                          stderr = stderr,
896                                          env = cmd_shenv.env,
897                                          close_fds = kUseCloseFDs))
898            # Let the helper know about this process
899            timeoutHelper.addProcess(procs[-1])
900        except OSError as e:
901            raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
902
903        # Immediately close stdin for any process taking stdin from us.
904        if stdin == subprocess.PIPE:
905            procs[-1].stdin.close()
906            procs[-1].stdin = None
907
908        # Update the current stdin source.
909        if stdout == subprocess.PIPE:
910            default_stdin = procs[-1].stdout
911        elif stderrIsStdout:
912            default_stdin = procs[-1].stderr
913        else:
914            default_stdin = subprocess.PIPE
915
916    # Explicitly close any redirected files. We need to do this now because we
917    # need to release any handles we may have on the temporary files (important
918    # on Win32, for example). Since we have already spawned the subprocess, our
919    # handles have already been transferred so we do not need them anymore.
920    for (name, mode, f, path) in opened_files:
921        f.close()
922
923    # FIXME: There is probably still deadlock potential here. Yawn.
924    procData = [None] * len(procs)
925    procData[-1] = procs[-1].communicate()
926
927    for i in range(len(procs) - 1):
928        if procs[i].stdout is not None:
929            out = procs[i].stdout.read()
930        else:
931            out = ''
932        if procs[i].stderr is not None:
933            err = procs[i].stderr.read()
934        else:
935            err = ''
936        procData[i] = (out,err)
937
938    # Read stderr out of the temp files.
939    for i,f in stderrTempFiles:
940        f.seek(0, 0)
941        procData[i] = (procData[i][0], f.read())
942
943    exitCode = None
944    for i,(out,err) in enumerate(procData):
945        res = procs[i].wait()
946        # Detect Ctrl-C in subprocess.
947        if res == -signal.SIGINT:
948            raise KeyboardInterrupt
949
950        # Ensure the resulting output is always of string type.
951        try:
952            if out is None:
953                out = ''
954            else:
955                out = to_string(out.decode('utf-8', errors='replace'))
956        except:
957            out = str(out)
958        try:
959            if err is None:
960                err = ''
961            else:
962                err = to_string(err.decode('utf-8', errors='replace'))
963        except:
964            err = str(err)
965
966        # Gather the redirected output files for failed commands.
967        output_files = []
968        if res != 0:
969            for (name, mode, f, path) in sorted(opened_files):
970                if path is not None and mode in ('w', 'a'):
971                    try:
972                        with open(path, 'rb') as f:
973                            data = f.read()
974                    except:
975                        data = None
976                    if data is not None:
977                        output_files.append((name, path, data))
978
979        results.append(ShellCommandResult(
980            cmd.commands[i], out, err, res, timeoutHelper.timeoutReached(),
981            output_files))
982        if cmd.pipe_err:
983            # Take the last failing exit code from the pipeline.
984            if not exitCode or res != 0:
985                exitCode = res
986        else:
987            exitCode = res
988
989    # Remove any named temporary files we created.
990    for f in named_temp_files:
991        try:
992            os.remove(f)
993        except OSError:
994            pass
995
996    if cmd.negate:
997        exitCode = not exitCode
998
999    return exitCode
1000
1001def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
1002    cmds = []
1003    for i, ln in enumerate(commands):
1004        ln = commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
1005        try:
1006            cmds.append(ShUtil.ShParser(ln, litConfig.isWindows,
1007                                        test.config.pipefail).parse())
1008        except:
1009            return lit.Test.Result(Test.FAIL, "shell parser error on: %r" % ln)
1010
1011    cmd = cmds[0]
1012    for c in cmds[1:]:
1013        cmd = ShUtil.Seq(cmd, '&&', c)
1014
1015    results = []
1016    timeoutInfo = None
1017    try:
1018        shenv = ShellEnvironment(cwd, test.config.environment)
1019        exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
1020    except InternalShellError:
1021        e = sys.exc_info()[1]
1022        exitCode = 127
1023        results.append(
1024            ShellCommandResult(e.command, '', e.message, exitCode, False))
1025
1026    out = err = ''
1027    for i,result in enumerate(results):
1028        # Write the command line run.
1029        out += '$ %s\n' % (' '.join('"%s"' % s
1030                                    for s in result.command.args),)
1031
1032        # If nothing interesting happened, move on.
1033        if litConfig.maxIndividualTestTime == 0 and \
1034               result.exitCode == 0 and \
1035               not result.stdout.strip() and not result.stderr.strip():
1036            continue
1037
1038        # Otherwise, something failed or was printed, show it.
1039
1040        # Add the command output, if redirected.
1041        for (name, path, data) in result.outputFiles:
1042            if data.strip():
1043                out += "# redirected output from %r:\n" % (name,)
1044                data = to_string(data.decode('utf-8', errors='replace'))
1045                if len(data) > 1024:
1046                    out += data[:1024] + "\n...\n"
1047                    out += "note: data was truncated\n"
1048                else:
1049                    out += data
1050                out += "\n"
1051
1052        if result.stdout.strip():
1053            out += '# command output:\n%s\n' % (result.stdout,)
1054        if result.stderr.strip():
1055            out += '# command stderr:\n%s\n' % (result.stderr,)
1056        if not result.stdout.strip() and not result.stderr.strip():
1057            out += "note: command had no output on stdout or stderr\n"
1058
1059        # Show the error conditions:
1060        if result.exitCode != 0:
1061            # On Windows, a negative exit code indicates a signal, and those are
1062            # easier to recognize or look up if we print them in hex.
1063            if litConfig.isWindows and result.exitCode < 0:
1064                codeStr = hex(int(result.exitCode & 0xFFFFFFFF)).rstrip("L")
1065            else:
1066                codeStr = str(result.exitCode)
1067            out += "error: command failed with exit status: %s\n" % (
1068                codeStr,)
1069        if litConfig.maxIndividualTestTime > 0:
1070            out += 'error: command reached timeout: %s\n' % (
1071                str(result.timeoutReached),)
1072
1073    return out, err, exitCode, timeoutInfo
1074
1075def executeScript(test, litConfig, tmpBase, commands, cwd):
1076    bashPath = litConfig.getBashPath()
1077    isWin32CMDEXE = (litConfig.isWindows and not bashPath)
1078    script = tmpBase + '.script'
1079    if isWin32CMDEXE:
1080        script += '.bat'
1081
1082    # Write script file
1083    mode = 'w'
1084    if litConfig.isWindows and not isWin32CMDEXE:
1085      mode += 'b'  # Avoid CRLFs when writing bash scripts.
1086    f = open(script, mode)
1087    if isWin32CMDEXE:
1088        for i, ln in enumerate(commands):
1089            commands[i] = re.sub(kPdbgRegex, "echo '\\1' > nul && ", ln)
1090        if litConfig.echo_all_commands:
1091            f.write('@echo on\n')
1092        else:
1093            f.write('@echo off\n')
1094        f.write('\n@if %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
1095    else:
1096        for i, ln in enumerate(commands):
1097            commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
1098        if test.config.pipefail:
1099            f.write('set -o pipefail;')
1100        if litConfig.echo_all_commands:
1101            f.write('set -x;')
1102        f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
1103    f.write('\n')
1104    f.close()
1105
1106    if isWin32CMDEXE:
1107        command = ['cmd','/c', script]
1108    else:
1109        if bashPath:
1110            command = [bashPath, script]
1111        else:
1112            command = ['/bin/sh', script]
1113        if litConfig.useValgrind:
1114            # FIXME: Running valgrind on sh is overkill. We probably could just
1115            # run on clang with no real loss.
1116            command = litConfig.valgrindArgs + command
1117
1118    try:
1119        out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
1120                                       env=test.config.environment,
1121                                       timeout=litConfig.maxIndividualTestTime)
1122        return (out, err, exitCode, None)
1123    except lit.util.ExecuteCommandTimeoutException as e:
1124        return (e.out, e.err, e.exitCode, e.msg)
1125
1126def parseIntegratedTestScriptCommands(source_path, keywords):
1127    """
1128    parseIntegratedTestScriptCommands(source_path) -> commands
1129
1130    Parse the commands in an integrated test script file into a list of
1131    (line_number, command_type, line).
1132    """
1133
1134    # This code is carefully written to be dual compatible with Python 2.5+ and
1135    # Python 3 without requiring input files to always have valid codings. The
1136    # trick we use is to open the file in binary mode and use the regular
1137    # expression library to find the commands, with it scanning strings in
1138    # Python2 and bytes in Python3.
1139    #
1140    # Once we find a match, we do require each script line to be decodable to
1141    # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1142    # remaining code can work with "strings" agnostic of the executing Python
1143    # version.
1144
1145    keywords_re = re.compile(
1146        to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)))
1147
1148    f = open(source_path, 'rb')
1149    try:
1150        # Read the entire file contents.
1151        data = f.read()
1152
1153        # Ensure the data ends with a newline.
1154        if not data.endswith(to_bytes('\n')):
1155            data = data + to_bytes('\n')
1156
1157        # Iterate over the matches.
1158        line_number = 1
1159        last_match_position = 0
1160        for match in keywords_re.finditer(data):
1161            # Compute the updated line number by counting the intervening
1162            # newlines.
1163            match_position = match.start()
1164            line_number += data.count(to_bytes('\n'), last_match_position,
1165                                      match_position)
1166            last_match_position = match_position
1167
1168            # Convert the keyword and line to UTF-8 strings and yield the
1169            # command. Note that we take care to return regular strings in
1170            # Python 2, to avoid other code having to differentiate between the
1171            # str and unicode types.
1172            #
1173            # Opening the file in binary mode prevented Windows \r newline
1174            # characters from being converted to Unix \n newlines, so manually
1175            # strip those from the yielded lines.
1176            keyword,ln = match.groups()
1177            yield (line_number, to_string(keyword.decode('utf-8')),
1178                   to_string(ln.decode('utf-8').rstrip('\r')))
1179    finally:
1180        f.close()
1181
1182def getTempPaths(test):
1183    """Get the temporary location, this is always relative to the test suite
1184    root, not test source root."""
1185    execpath = test.getExecPath()
1186    execdir,execbase = os.path.split(execpath)
1187    tmpDir = os.path.join(execdir, 'Output')
1188    tmpBase = os.path.join(tmpDir, execbase)
1189    return tmpDir, tmpBase
1190
1191def colonNormalizePath(path):
1192    if kIsWindows:
1193        return re.sub(r'^(.):', r'\1', path.replace('\\', '/'))
1194    else:
1195        assert path[0] == '/'
1196        return path[1:]
1197
1198def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
1199    sourcepath = test.getSourcePath()
1200    sourcedir = os.path.dirname(sourcepath)
1201
1202    # Normalize slashes, if requested.
1203    if normalize_slashes:
1204        sourcepath = sourcepath.replace('\\', '/')
1205        sourcedir = sourcedir.replace('\\', '/')
1206        tmpDir = tmpDir.replace('\\', '/')
1207        tmpBase = tmpBase.replace('\\', '/')
1208
1209    # We use #_MARKER_# to hide %% while we do the other substitutions.
1210    substitutions = []
1211    substitutions.extend([('%%', '#_MARKER_#')])
1212    substitutions.extend(test.config.substitutions)
1213    tmpName = tmpBase + '.tmp'
1214    baseName = os.path.basename(tmpBase)
1215    substitutions.extend([('%s', sourcepath),
1216                          ('%S', sourcedir),
1217                          ('%p', sourcedir),
1218                          ('%{pathsep}', os.pathsep),
1219                          ('%t', tmpName),
1220                          ('%basename_t', baseName),
1221                          ('%T', tmpDir),
1222                          ('#_MARKER_#', '%')])
1223
1224    # "%/[STpst]" should be normalized.
1225    substitutions.extend([
1226            ('%/s', sourcepath.replace('\\', '/')),
1227            ('%/S', sourcedir.replace('\\', '/')),
1228            ('%/p', sourcedir.replace('\\', '/')),
1229            ('%/t', tmpBase.replace('\\', '/') + '.tmp'),
1230            ('%/T', tmpDir.replace('\\', '/')),
1231            ])
1232
1233    # "%:[STpst]" are normalized paths without colons and without a leading
1234    # slash.
1235    substitutions.extend([
1236            ('%:s', colonNormalizePath(sourcepath)),
1237            ('%:S', colonNormalizePath(sourcedir)),
1238            ('%:p', colonNormalizePath(sourcedir)),
1239            ('%:t', colonNormalizePath(tmpBase + '.tmp')),
1240            ('%:T', colonNormalizePath(tmpDir)),
1241            ])
1242    return substitutions
1243
1244def applySubstitutions(script, substitutions):
1245    """Apply substitutions to the script.  Allow full regular expression syntax.
1246    Replace each matching occurrence of regular expression pattern a with
1247    substitution b in line ln."""
1248    def processLine(ln):
1249        # Apply substitutions
1250        for a,b in substitutions:
1251            if kIsWindows:
1252                b = b.replace("\\","\\\\")
1253            ln = re.sub(a, b, ln)
1254
1255        # Strip the trailing newline and any extra whitespace.
1256        return ln.strip()
1257    # Note Python 3 map() gives an iterator rather than a list so explicitly
1258    # convert to list before returning.
1259    return list(map(processLine, script))
1260
1261
1262class ParserKind(object):
1263    """
1264    An enumeration representing the style of an integrated test keyword or
1265    command.
1266
1267    TAG: A keyword taking no value. Ex 'END.'
1268    COMMAND: A keyword taking a list of shell commands. Ex 'RUN:'
1269    LIST: A keyword taking a comma-separated list of values.
1270    BOOLEAN_EXPR: A keyword taking a comma-separated list of
1271        boolean expressions. Ex 'XFAIL:'
1272    CUSTOM: A keyword with custom parsing semantics.
1273    """
1274    TAG = 0
1275    COMMAND = 1
1276    LIST = 2
1277    BOOLEAN_EXPR = 3
1278    CUSTOM = 4
1279
1280    @staticmethod
1281    def allowedKeywordSuffixes(value):
1282        return { ParserKind.TAG:          ['.'],
1283                 ParserKind.COMMAND:      [':'],
1284                 ParserKind.LIST:         [':'],
1285                 ParserKind.BOOLEAN_EXPR: [':'],
1286                 ParserKind.CUSTOM:       [':', '.']
1287               } [value]
1288
1289    @staticmethod
1290    def str(value):
1291        return { ParserKind.TAG:          'TAG',
1292                 ParserKind.COMMAND:      'COMMAND',
1293                 ParserKind.LIST:         'LIST',
1294                 ParserKind.BOOLEAN_EXPR: 'BOOLEAN_EXPR',
1295                 ParserKind.CUSTOM:       'CUSTOM'
1296               } [value]
1297
1298
1299class IntegratedTestKeywordParser(object):
1300    """A parser for LLVM/Clang style integrated test scripts.
1301
1302    keyword: The keyword to parse for. It must end in either '.' or ':'.
1303    kind: An value of ParserKind.
1304    parser: A custom parser. This value may only be specified with
1305            ParserKind.CUSTOM.
1306    """
1307    def __init__(self, keyword, kind, parser=None, initial_value=None):
1308        allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind)
1309        if len(keyword) == 0 or keyword[-1] not in allowedSuffixes:
1310            if len(allowedSuffixes) == 1:
1311                raise ValueError("Keyword '%s' of kind '%s' must end in '%s'"
1312                                 % (keyword, ParserKind.str(kind),
1313                                    allowedSuffixes[0]))
1314            else:
1315                raise ValueError("Keyword '%s' of kind '%s' must end in "
1316                                 " one of '%s'"
1317                                 % (keyword, ParserKind.str(kind),
1318                                    ' '.join(allowedSuffixes)))
1319
1320        if parser is not None and kind != ParserKind.CUSTOM:
1321            raise ValueError("custom parsers can only be specified with "
1322                             "ParserKind.CUSTOM")
1323        self.keyword = keyword
1324        self.kind = kind
1325        self.parsed_lines = []
1326        self.value = initial_value
1327        self.parser = parser
1328
1329        if kind == ParserKind.COMMAND:
1330            self.parser = lambda line_number, line, output: \
1331                                 self._handleCommand(line_number, line, output,
1332                                                     self.keyword)
1333        elif kind == ParserKind.LIST:
1334            self.parser = self._handleList
1335        elif kind == ParserKind.BOOLEAN_EXPR:
1336            self.parser = self._handleBooleanExpr
1337        elif kind == ParserKind.TAG:
1338            self.parser = self._handleTag
1339        elif kind == ParserKind.CUSTOM:
1340            if parser is None:
1341                raise ValueError("ParserKind.CUSTOM requires a custom parser")
1342            self.parser = parser
1343        else:
1344            raise ValueError("Unknown kind '%s'" % kind)
1345
1346    def parseLine(self, line_number, line):
1347        try:
1348            self.parsed_lines += [(line_number, line)]
1349            self.value = self.parser(line_number, line, self.value)
1350        except ValueError as e:
1351            raise ValueError(str(e) + ("\nin %s directive on test line %d" %
1352                                       (self.keyword, line_number)))
1353
1354    def getValue(self):
1355        return self.value
1356
1357    @staticmethod
1358    def _handleTag(line_number, line, output):
1359        """A helper for parsing TAG type keywords"""
1360        return (not line.strip() or output)
1361
1362    @staticmethod
1363    def _handleCommand(line_number, line, output, keyword):
1364        """A helper for parsing COMMAND type keywords"""
1365        # Trim trailing whitespace.
1366        line = line.rstrip()
1367        # Substitute line number expressions
1368        line = re.sub('%\(line\)', str(line_number), line)
1369
1370        def replace_line_number(match):
1371            if match.group(1) == '+':
1372                return str(line_number + int(match.group(2)))
1373            if match.group(1) == '-':
1374                return str(line_number - int(match.group(2)))
1375        line = re.sub('%\(line *([\+-]) *(\d+)\)', replace_line_number, line)
1376        # Collapse lines with trailing '\\'.
1377        if output and output[-1][-1] == '\\':
1378            output[-1] = output[-1][:-1] + line
1379        else:
1380            if output is None:
1381                output = []
1382            pdbg = "%dbg({keyword} at line {line_number})".format(
1383                keyword=keyword,
1384                line_number=line_number)
1385            assert re.match(kPdbgRegex + "$", pdbg), \
1386                   "kPdbgRegex expected to match actual %dbg usage"
1387            line = "{pdbg} {real_command}".format(
1388                pdbg=pdbg,
1389                real_command=line)
1390            output.append(line)
1391        return output
1392
1393    @staticmethod
1394    def _handleList(line_number, line, output):
1395        """A parser for LIST type keywords"""
1396        if output is None:
1397            output = []
1398        output.extend([s.strip() for s in line.split(',')])
1399        return output
1400
1401    @staticmethod
1402    def _handleBooleanExpr(line_number, line, output):
1403        """A parser for BOOLEAN_EXPR type keywords"""
1404        if output is None:
1405            output = []
1406        output.extend([s.strip() for s in line.split(',')])
1407        # Evaluate each expression to verify syntax.
1408        # We don't want any results, just the raised ValueError.
1409        for s in output:
1410            if s != '*':
1411                BooleanExpression.evaluate(s, [])
1412        return output
1413
1414    @staticmethod
1415    def _handleRequiresAny(line_number, line, output):
1416        """A custom parser to transform REQUIRES-ANY: into REQUIRES:"""
1417
1418        # Extract the conditions specified in REQUIRES-ANY: as written.
1419        conditions = []
1420        IntegratedTestKeywordParser._handleList(line_number, line, conditions)
1421
1422        # Output a `REQUIRES: a || b || c` expression in its place.
1423        expression = ' || '.join(conditions)
1424        IntegratedTestKeywordParser._handleBooleanExpr(line_number,
1425                                                       expression, output)
1426        return output
1427
1428def parseIntegratedTestScript(test, additional_parsers=[],
1429                              require_script=True):
1430    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
1431    script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
1432    and 'UNSUPPORTED' information.
1433
1434    If additional parsers are specified then the test is also scanned for the
1435    keywords they specify and all matches are passed to the custom parser.
1436
1437    If 'require_script' is False an empty script
1438    may be returned. This can be used for test formats where the actual script
1439    is optional or ignored.
1440    """
1441
1442    # Install the built-in keyword parsers.
1443    script = []
1444    builtin_parsers = [
1445        IntegratedTestKeywordParser('RUN:', ParserKind.COMMAND,
1446                                    initial_value=script),
1447        IntegratedTestKeywordParser('XFAIL:', ParserKind.BOOLEAN_EXPR,
1448                                    initial_value=test.xfails),
1449        IntegratedTestKeywordParser('REQUIRES:', ParserKind.BOOLEAN_EXPR,
1450                                    initial_value=test.requires),
1451        IntegratedTestKeywordParser('REQUIRES-ANY:', ParserKind.CUSTOM,
1452                                    IntegratedTestKeywordParser._handleRequiresAny,
1453                                    initial_value=test.requires),
1454        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.BOOLEAN_EXPR,
1455                                    initial_value=test.unsupported),
1456        IntegratedTestKeywordParser('END.', ParserKind.TAG)
1457    ]
1458    keyword_parsers = {p.keyword: p for p in builtin_parsers}
1459
1460    # Install user-defined additional parsers.
1461    for parser in additional_parsers:
1462        if not isinstance(parser, IntegratedTestKeywordParser):
1463            raise ValueError('additional parser must be an instance of '
1464                             'IntegratedTestKeywordParser')
1465        if parser.keyword in keyword_parsers:
1466            raise ValueError("Parser for keyword '%s' already exists"
1467                             % parser.keyword)
1468        keyword_parsers[parser.keyword] = parser
1469
1470    # Collect the test lines from the script.
1471    sourcepath = test.getSourcePath()
1472    for line_number, command_type, ln in \
1473            parseIntegratedTestScriptCommands(sourcepath,
1474                                              keyword_parsers.keys()):
1475        parser = keyword_parsers[command_type]
1476        parser.parseLine(line_number, ln)
1477        if command_type == 'END.' and parser.getValue() is True:
1478            break
1479
1480    # Verify the script contains a run line.
1481    if require_script and not script:
1482        return lit.Test.Result(Test.UNRESOLVED, "Test has no run line!")
1483
1484    # Check for unterminated run lines.
1485    if script and script[-1][-1] == '\\':
1486        return lit.Test.Result(Test.UNRESOLVED,
1487                               "Test has unterminated run lines (with '\\')")
1488
1489    # Enforce REQUIRES:
1490    missing_required_features = test.getMissingRequiredFeatures()
1491    if missing_required_features:
1492        msg = ', '.join(missing_required_features)
1493        return lit.Test.Result(Test.UNSUPPORTED,
1494                               "Test requires the following unavailable "
1495                               "features: %s" % msg)
1496
1497    # Enforce UNSUPPORTED:
1498    unsupported_features = test.getUnsupportedFeatures()
1499    if unsupported_features:
1500        msg = ', '.join(unsupported_features)
1501        return lit.Test.Result(
1502            Test.UNSUPPORTED,
1503            "Test does not support the following features "
1504            "and/or targets: %s" % msg)
1505
1506    # Enforce limit_to_features.
1507    if not test.isWithinFeatureLimits():
1508        msg = ', '.join(test.config.limit_to_features)
1509        return lit.Test.Result(Test.UNSUPPORTED,
1510                               "Test does not require any of the features "
1511                               "specified in limit_to_features: %s" % msg)
1512
1513    return script
1514
1515
1516def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
1517    # Create the output directory if it does not already exist.
1518    lit.util.mkdir_p(os.path.dirname(tmpBase))
1519
1520    execdir = os.path.dirname(test.getExecPath())
1521    if useExternalSh:
1522        res = executeScript(test, litConfig, tmpBase, script, execdir)
1523    else:
1524        res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
1525    if isinstance(res, lit.Test.Result):
1526        return res
1527
1528    out,err,exitCode,timeoutInfo = res
1529    if exitCode == 0:
1530        status = Test.PASS
1531    else:
1532        if timeoutInfo is None:
1533            status = Test.FAIL
1534        else:
1535            status = Test.TIMEOUT
1536
1537    # Form the output log.
1538    output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
1539        '\n'.join(script), exitCode)
1540
1541    if timeoutInfo is not None:
1542        output += """Timeout: %s\n""" % (timeoutInfo,)
1543    output += "\n"
1544
1545    # Append the outputs, if present.
1546    if out:
1547        output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
1548    if err:
1549        output += """Command Output (stderr):\n--\n%s\n--\n""" % (err,)
1550
1551    return lit.Test.Result(status, output)
1552
1553
1554def executeShTest(test, litConfig, useExternalSh,
1555                  extra_substitutions=[]):
1556    if test.config.unsupported:
1557        return lit.Test.Result(Test.UNSUPPORTED, 'Test is unsupported')
1558
1559    script = parseIntegratedTestScript(test)
1560    if isinstance(script, lit.Test.Result):
1561        return script
1562    if litConfig.noExecute:
1563        return lit.Test.Result(Test.PASS)
1564
1565    tmpDir, tmpBase = getTempPaths(test)
1566    substitutions = list(extra_substitutions)
1567    substitutions += getDefaultSubstitutions(test, tmpDir, tmpBase,
1568                                             normalize_slashes=useExternalSh)
1569    script = applySubstitutions(script, substitutions)
1570
1571    # Re-run failed tests up to test_retry_attempts times.
1572    attempts = 1
1573    if hasattr(test.config, 'test_retry_attempts'):
1574        attempts += test.config.test_retry_attempts
1575    for i in range(attempts):
1576        res = _runShTest(test, litConfig, useExternalSh, script, tmpBase)
1577        if res.code != Test.FAIL:
1578            break
1579    # If we had to run the test more than once, count it as a flaky pass. These
1580    # will be printed separately in the test summary.
1581    if i > 0 and res.code == Test.PASS:
1582        res.code = Test.FLAKYPASS
1583    return res
1584