jerryscript/tools/gen-doctest.py

#!/usr/bin/env python

# Copyright JS Foundation and other contributors, http://js.foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import argparse
import fileinput
import os
import re
import shlex
import sys


class DoctestExtractor(object):
    """
    An extractor to process Markdown files and find doctests inside.
    """

    def __init__(self, outdir, dry):
        """
        :param outdir: path to the directory where to write the found doctests.
        :param dry: if True, don't create the doctest files but print the file
            names only.
        """
        self._outdir = outdir
        self._dry = dry

        # Attributes actually initialized by process()
        self._infile = None
        self._outname_base = None
        self._outname_cnt = None

    def _warning(self, message, lineno):
        """
        Print a warning to the standard error.

        :param message: a description of the problem.
        :param lineno: the location that triggered the warning.
        """
        print('%s:%d: %s' % (self._infile, lineno, message), file=sys.stderr)

    def _process_decl(self, params):
        """
        Process a doctest declaration (`[doctest]: # (name="test.c", ...)`).

        :param params: the parameter string of the declaration (the string
            between the parentheses).
        :return: a tuple of a dictionary (of keys and values taken from the
            `params` string) and the line number of the declaration.
        """
        tokens = list(shlex.shlex(params))

        decl = {}
        for i in range(0, len(tokens), 4):
            if i + 2 >= len(tokens) or tokens[i + 1] != '=' or (i + 3 < len(tokens) and tokens[i + 3] != ','):
                self._warning('incorrect parameter list for test (key="value", ...)', fileinput.filelineno())
                decl = {}
                break
            decl[tokens[i]] = tokens[i + 2].strip('\'"')

        if 'name' not in decl:
            decl['name'] = '%s%d.c' % (self._outname_base, self._outname_cnt)
            self._outname_cnt += 1

        if 'test' not in decl:
            decl['test'] = 'run'

        return decl, fileinput.filelineno()

    def _process_code_start(self):
        """
        Process the beginning of a fenced code block (` ```c `).

        :return: a tuple of a list (of the first line(s) of the doctest) and the
            line number of the start of the code block.
        """
        return ['#line %d "%s"\n' % (fileinput.filelineno() + 1, self._infile)], fileinput.filelineno()

    def _process_code_end(self, decl, code):
        """
        Process the end of a fenced code block (` ``` `).

        :param decl: the dictionary of the declaration parameters.
        :param code: the list of lines of the doctest.
        """
        outname = os.path.join(self._outdir, decl['name']).replace('\\', '/')
        action = decl['test']
        if self._dry:
            print('%s %s' % (action, outname))
        else:
            with open(outname, 'w') as outfile:
                outfile.writelines(code)

    def process(self, infile):
        """
        Find doctests in a Markdown file and process them according to the
        constructor parameters.

        :param infile: path to the input file.
        """
        self._infile = infile
        self._outname_base = os.path.splitext(os.path.basename(infile))[0]
        self._outname_cnt = 1

        mode = 'TEXT'
        decl, decl_lineno = {}, 0
        code, code_lineno = [], 0

        for line in fileinput.input(infile):
            decl_match = re.match(r'^\[doctest\]:\s+#\s+\((.*)\)\s*$', line)
            nl_match = re.match(r'^\s*$', line)
            start_match = re.match(r'^```c\s*$', line)
            end_match = re.match(r'^```\s*', line)

            if mode == 'TEXT':
                if decl_match is not None:
                    decl, decl_lineno = self._process_decl(decl_match.group(1))
                    mode = 'NL'
            elif mode == 'NL':
                if decl_match is not None:
                    self._warning('test without code block', decl_lineno)
                    decl, decl_lineno = self._process_decl(decl_match.group(1))
                elif start_match is not None:
                    code, code_lineno = self._process_code_start()
                    mode = 'CODE'
                elif nl_match is None:
                    self._warning('test without code block', decl_lineno)
                    mode = 'TEXT'
            elif mode == 'CODE':
                if end_match is not None:
                    self._process_code_end(decl, code)
                    mode = 'TEXT'
                else:
                    code.append(line)

        if mode == 'NL':
            self._warning('test without code block', decl_lineno)
        elif mode == 'CODE':
            self._warning('unterminated code block', code_lineno)


def main():
    parser = argparse.ArgumentParser(description='Markdown doctest extractor', epilog="""
        The tool extracts specially marked fenced C code blocks from the input Markdown files
        and writes them to the file system. The annotations recognized by the tool are special
        but valid Markdown links/comments that must be added before the fenced code blocks:
        `[doctest]: # (name="test.c", ...)`. For now, two parameters are valid:
        `name` determines the filename for the extracted code block (overriding the default
        auto-numbered naming scheme), and `test` determines the test action to be performed on
        the extracted code (valid options are "compile", "link", and the default "run").
        """)
    parser.add_argument('-d', '--dir', metavar='NAME', default=os.getcwd(),
                        help='output directory name (default: %(default)s)')
    parser.add_argument('--dry', action='store_true',
                        help='don\'t generate files but print file names that would be generated '
                             'and what test action to perform on them')
    parser.add_argument('file', nargs='+',
                        help='input Markdown file(s)')
    args = parser.parse_args()

    extractor = DoctestExtractor(args.dir, args.dry)
    for mdfile in args.file:
        extractor.process(mdfile)


if __name__ == '__main__':
    main()