1#!/usr/bin/env python 2 3# Copyright JS Foundation and other contributors, http://js.foundation 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from __future__ import print_function 18 19import argparse 20import fileinput 21import os 22import re 23import shlex 24import sys 25 26 27class DoctestExtractor(object): 28 """ 29 An extractor to process Markdown files and find doctests inside. 30 """ 31 32 def __init__(self, outdir, dry): 33 """ 34 :param outdir: path to the directory where to write the found doctests. 35 :param dry: if True, don't create the doctest files but print the file 36 names only. 37 """ 38 self._outdir = outdir 39 self._dry = dry 40 41 # Attributes actually initialized by process() 42 self._infile = None 43 self._outname_base = None 44 self._outname_cnt = None 45 46 def _warning(self, message, lineno): 47 """ 48 Print a warning to the standard error. 49 50 :param message: a description of the problem. 51 :param lineno: the location that triggered the warning. 52 """ 53 print('%s:%d: %s' % (self._infile, lineno, message), file=sys.stderr) 54 55 def _process_decl(self, params): 56 """ 57 Process a doctest declaration (`[doctest]: # (name="test.c", ...)`). 58 59 :param params: the parameter string of the declaration (the string 60 between the parentheses). 61 :return: a tuple of a dictionary (of keys and values taken from the 62 `params` string) and the line number of the declaration. 63 """ 64 tokens = list(shlex.shlex(params)) 65 66 decl = {} 67 for i in range(0, len(tokens), 4): 68 if i + 2 >= len(tokens) or tokens[i + 1] != '=' or (i + 3 < len(tokens) and tokens[i + 3] != ','): 69 self._warning('incorrect parameter list for test (key="value", ...)', fileinput.filelineno()) 70 decl = {} 71 break 72 decl[tokens[i]] = tokens[i + 2].strip('\'"') 73 74 if 'name' not in decl: 75 decl['name'] = '%s%d.c' % (self._outname_base, self._outname_cnt) 76 self._outname_cnt += 1 77 78 if 'test' not in decl: 79 decl['test'] = 'run' 80 81 return decl, fileinput.filelineno() 82 83 def _process_code_start(self): 84 """ 85 Process the beginning of a fenced code block (` ```c `). 86 87 :return: a tuple of a list (of the first line(s) of the doctest) and the 88 line number of the start of the code block. 89 """ 90 return ['#line %d "%s"\n' % (fileinput.filelineno() + 1, self._infile)], fileinput.filelineno() 91 92 def _process_code_end(self, decl, code): 93 """ 94 Process the end of a fenced code block (` ``` `). 95 96 :param decl: the dictionary of the declaration parameters. 97 :param code: the list of lines of the doctest. 98 """ 99 outname = os.path.join(self._outdir, decl['name']).replace('\\', '/') 100 action = decl['test'] 101 if self._dry: 102 print('%s %s' % (action, outname)) 103 else: 104 with open(outname, 'w') as outfile: 105 outfile.writelines(code) 106 107 def process(self, infile): 108 """ 109 Find doctests in a Markdown file and process them according to the 110 constructor parameters. 111 112 :param infile: path to the input file. 113 """ 114 self._infile = infile 115 self._outname_base = os.path.splitext(os.path.basename(infile))[0] 116 self._outname_cnt = 1 117 118 mode = 'TEXT' 119 decl, decl_lineno = {}, 0 120 code, code_lineno = [], 0 121 122 for line in fileinput.input(infile): 123 decl_match = re.match(r'^\[doctest\]:\s+#\s+\((.*)\)\s*$', line) 124 nl_match = re.match(r'^\s*$', line) 125 start_match = re.match(r'^```c\s*$', line) 126 end_match = re.match(r'^```\s*', line) 127 128 if mode == 'TEXT': 129 if decl_match is not None: 130 decl, decl_lineno = self._process_decl(decl_match.group(1)) 131 mode = 'NL' 132 elif mode == 'NL': 133 if decl_match is not None: 134 self._warning('test without code block', decl_lineno) 135 decl, decl_lineno = self._process_decl(decl_match.group(1)) 136 elif start_match is not None: 137 code, code_lineno = self._process_code_start() 138 mode = 'CODE' 139 elif nl_match is None: 140 self._warning('test without code block', decl_lineno) 141 mode = 'TEXT' 142 elif mode == 'CODE': 143 if end_match is not None: 144 self._process_code_end(decl, code) 145 mode = 'TEXT' 146 else: 147 code.append(line) 148 149 if mode == 'NL': 150 self._warning('test without code block', decl_lineno) 151 elif mode == 'CODE': 152 self._warning('unterminated code block', code_lineno) 153 154 155def main(): 156 parser = argparse.ArgumentParser(description='Markdown doctest extractor', epilog=""" 157 The tool extracts specially marked fenced C code blocks from the input Markdown files 158 and writes them to the file system. The annotations recognized by the tool are special 159 but valid Markdown links/comments that must be added before the fenced code blocks: 160 `[doctest]: # (name="test.c", ...)`. For now, two parameters are valid: 161 `name` determines the filename for the extracted code block (overriding the default 162 auto-numbered naming scheme), and `test` determines the test action to be performed on 163 the extracted code (valid options are "compile", "link", and the default "run"). 164 """) 165 parser.add_argument('-d', '--dir', metavar='NAME', default=os.getcwd(), 166 help='output directory name (default: %(default)s)') 167 parser.add_argument('--dry', action='store_true', 168 help='don\'t generate files but print file names that would be generated ' 169 'and what test action to perform on them') 170 parser.add_argument('file', nargs='+', 171 help='input Markdown file(s)') 172 args = parser.parse_args() 173 174 extractor = DoctestExtractor(args.dir, args.dry) 175 for mdfile in args.file: 176 extractor.process(mdfile) 177 178 179if __name__ == '__main__': 180 main() 181