1# 2# sources.py 3# 4# Convert source code comments to multi-line blocks (library file). 5# 6# Copyright 2002-2018 by 7# David Turner. 8# 9# This file is part of the FreeType project, and may only be used, 10# modified, and distributed under the terms of the FreeType project 11# license, LICENSE.TXT. By continuing to use, modify, or distribute 12# this file you indicate that you have read the license and 13# understand and accept it fully. 14 15# 16# This library file contains definitions of classes needed to decompose C 17# source code files into a series of multi-line `blocks'. There are two 18# kinds of blocks. 19# 20# - Normal blocks, which contain source code or ordinary comments. 21# 22# - Documentation blocks, which have restricted formatting, and whose text 23# always start with a documentation markup tag like `<Function>', 24# `<Type>', etc. 25# 26# The routines to process the content of documentation blocks are contained 27# in file `content.py'; the classes and methods found here only deal with 28# text parsing and basic documentation block extraction. 29# 30 31 32import fileinput, re, string 33 34 35################################################################ 36## 37## SOURCE BLOCK FORMAT CLASS 38## 39## A simple class containing compiled regular expressions to detect 40## potential documentation format block comments within C source code. 41## 42## The `column' pattern must contain a group to `unbox' the content of 43## documentation comment blocks. 44## 45## Later on, paragraphs are converted to long lines, which simplifies the 46## regular expressions that act upon the text. 47## 48class SourceBlockFormat: 49 50 def __init__( self, id, start, column, end ): 51 """Create a block pattern, used to recognize special documentation 52 blocks.""" 53 self.id = id 54 self.start = re.compile( start, re.VERBOSE ) 55 self.column = re.compile( column, re.VERBOSE ) 56 self.end = re.compile( end, re.VERBOSE ) 57 58 59# 60# Format 1 documentation comment blocks. 61# 62# /************************************/ (at least 2 asterisks) 63# /* */ 64# /* */ 65# /* */ 66# /************************************/ (at least 2 asterisks) 67# 68start = r''' 69 \s* # any number of whitespace 70 /\*{2,}/ # followed by '/' and at least two asterisks then '/' 71 \s*$ # probably followed by whitespace 72''' 73 74column = r''' 75 \s* # any number of whitespace 76 /\*{1} # followed by '/' and precisely one asterisk 77 ([^*].*) # followed by anything (group 1) 78 \*{1}/ # followed by one asterisk and a '/' 79 \s*$ # probably followed by whitespace 80''' 81 82re_source_block_format1 = SourceBlockFormat( 1, start, column, start ) 83 84 85# 86# Format 2 documentation comment blocks. 87# 88# /************************************ (at least 2 asterisks) 89# * 90# * (1 asterisk) 91# * 92# */ (1 or more asterisks) 93# 94start = r''' 95 \s* # any number of whitespace 96 /\*{2,} # followed by '/' and at least two asterisks 97 \s*$ # probably followed by whitespace 98''' 99 100column = r''' 101 \s* # any number of whitespace 102 \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/' 103 (.*) # then anything (group1) 104''' 105 106end = r''' 107 \s* # any number of whitespace 108 \*+/ # followed by at least one asterisk, then '/' 109''' 110 111re_source_block_format2 = SourceBlockFormat( 2, start, column, end ) 112 113 114# 115# The list of supported documentation block formats. We could add new ones 116# quite easily. 117# 118re_source_block_formats = [re_source_block_format1, re_source_block_format2] 119 120 121# 122# The following regular expressions correspond to markup tags within the 123# documentation comment blocks. They are equivalent despite their different 124# syntax. 125# 126# A markup tag consists of letters or character `-', to be found in group 1. 127# 128# Notice that a markup tag _must_ begin a new paragraph. 129# 130re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' ) # <xxxx> format 131re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' ) # @xxxx: format 132 133# 134# The list of supported markup tags. We could add new ones quite easily. 135# 136re_markup_tags = [re_markup_tag1, re_markup_tag2] 137 138 139# 140# A regular expression to detect a cross reference, after markup tags have 141# been stripped off. 142# 143# Two syntax forms are supported: 144# 145# @<name> 146# @<name>[<id>] 147# 148# where both `<name>' and `<id>' consist of alphanumeric characters, `_', 149# and `-'. Use `<id>' if there are multiple, valid `<name>' entries. 150# 151# Example: @foo[bar] 152# 153re_crossref = re.compile( r""" 154 @ 155 (?P<name>(?:\w|-)+ 156 (?:\[(?:\w|-)+\])?) 157 (?P<rest>.*) 158 """, re.VERBOSE ) 159 160# 161# Two regular expressions to detect italic and bold markup, respectively. 162# Group 1 is the markup, group 2 the rest of the line. 163# 164# Note that the markup is limited to words consisting of letters, digits, 165# the characters `_' and `-', or an apostrophe (but not as the first 166# character). 167# 168re_italic = re.compile( r"_((?:\w|-)(?:\w|'|-)*)_(.*)" ) # _italic_ 169re_bold = re.compile( r"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" ) # *bold* 170 171# 172# This regular expression code to identify an URL has been taken from 173# 174# https://mail.python.org/pipermail/tutor/2002-September/017228.html 175# 176# (with slight modifications). 177# 178urls = r'(?:https?|telnet|gopher|file|wais|ftp)' 179ltrs = r'\w' 180gunk = r'/#~:.?+=&%@!\-' 181punc = r'.:?\-' 182any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs, 183 'gunk' : gunk, 184 'punc' : punc } 185url = r""" 186 ( 187 \b # start at word boundary 188 %(urls)s : # need resource and a colon 189 [%(any)s] +? # followed by one or more of any valid 190 # character, but be conservative and 191 # take only what you need to... 192 (?= # [look-ahead non-consumptive assertion] 193 [%(punc)s]* # either 0 or more punctuation 194 (?: # [non-grouping parentheses] 195 [^%(any)s] | $ # followed by a non-url char 196 # or end of the string 197 ) 198 ) 199 ) 200 """ % {'urls' : urls, 201 'any' : any, 202 'punc' : punc } 203 204re_url = re.compile( url, re.VERBOSE | re.MULTILINE ) 205 206# 207# A regular expression that stops collection of comments for the current 208# block. 209# 210re_source_sep = re.compile( r'\s*/\*\s*\*/' ) # /* */ 211 212# 213# A regular expression to find possible C identifiers while outputting 214# source code verbatim, covering things like `*foo' or `(bar'. Group 1 is 215# the prefix, group 2 the identifier -- since we scan lines from left to 216# right, sequentially splitting the source code into prefix and identifier 217# is fully sufficient for our purposes. 218# 219re_source_crossref = re.compile( r'(\W*)(\w*)' ) 220 221# 222# A regular expression that matches a list of reserved C source keywords. 223# 224re_source_keywords = re.compile( '''\\b ( typedef | 225 struct | 226 enum | 227 union | 228 const | 229 char | 230 int | 231 short | 232 long | 233 void | 234 signed | 235 unsigned | 236 \#include | 237 \#define | 238 \#undef | 239 \#if | 240 \#ifdef | 241 \#ifndef | 242 \#else | 243 \#endif ) \\b''', re.VERBOSE ) 244 245 246################################################################ 247## 248## SOURCE BLOCK CLASS 249## 250## There are two important fields in a `SourceBlock' object. 251## 252## self.lines 253## A list of text lines for the corresponding block. 254## 255## self.content 256## For documentation comment blocks only, this is the block content 257## that has been `unboxed' from its decoration. This is `None' for all 258## other blocks (i.e., sources or ordinary comments with no starting 259## markup tag) 260## 261class SourceBlock: 262 263 def __init__( self, processor, filename, lineno, lines ): 264 self.processor = processor 265 self.filename = filename 266 self.lineno = lineno 267 self.lines = lines[:] 268 self.format = processor.format 269 self.content = [] 270 271 if self.format == None: 272 return 273 274 words = [] 275 276 # extract comment lines 277 lines = [] 278 279 for line0 in self.lines: 280 m = self.format.column.match( line0 ) 281 if m: 282 lines.append( m.group( 1 ) ) 283 284 # now, look for a markup tag 285 for l in lines: 286 l = string.strip( l ) 287 if len( l ) > 0: 288 for tag in re_markup_tags: 289 if tag.match( l ): 290 self.content = lines 291 return 292 293 def location( self ): 294 return "(" + self.filename + ":" + repr( self.lineno ) + ")" 295 296 # debugging only -- not used in normal operations 297 def dump( self ): 298 if self.content: 299 print( "{{{content start---" ) 300 for l in self.content: 301 print( l ) 302 print( "---content end}}}" ) 303 return 304 305 fmt = "" 306 if self.format: 307 fmt = repr( self.format.id ) + " " 308 309 for line in self.lines: 310 print( line ) 311 312 313################################################################ 314## 315## SOURCE PROCESSOR CLASS 316## 317## The `SourceProcessor' is in charge of reading a C source file and 318## decomposing it into a series of different `SourceBlock' objects. 319## 320## A SourceBlock object consists of the following data. 321## 322## - A documentation comment block using one of the layouts above. Its 323## exact format will be discussed later. 324## 325## - Normal sources lines, including comments. 326## 327## 328class SourceProcessor: 329 330 def __init__( self ): 331 """Initialize a source processor.""" 332 self.blocks = [] 333 self.filename = None 334 self.format = None 335 self.lines = [] 336 337 def reset( self ): 338 """Reset a block processor and clean up all its blocks.""" 339 self.blocks = [] 340 self.format = None 341 342 def parse_file( self, filename ): 343 """Parse a C source file and add its blocks to the processor's 344 list.""" 345 self.reset() 346 347 self.filename = filename 348 349 fileinput.close() 350 self.format = None 351 self.lineno = 0 352 self.lines = [] 353 354 for line in fileinput.input( filename ): 355 # strip trailing newlines, important on Windows machines! 356 if line[-1] == '\012': 357 line = line[0:-1] 358 359 if self.format == None: 360 self.process_normal_line( line ) 361 else: 362 if self.format.end.match( line ): 363 # A normal block end. Add it to `lines' and create a 364 # new block 365 self.lines.append( line ) 366 self.add_block_lines() 367 elif self.format.column.match( line ): 368 # A normal column line. Add it to `lines'. 369 self.lines.append( line ) 370 else: 371 # An unexpected block end. Create a new block, but 372 # don't process the line. 373 self.add_block_lines() 374 375 # we need to process the line again 376 self.process_normal_line( line ) 377 378 # record the last lines 379 self.add_block_lines() 380 381 def process_normal_line( self, line ): 382 """Process a normal line and check whether it is the start of a new 383 block.""" 384 for f in re_source_block_formats: 385 if f.start.match( line ): 386 self.add_block_lines() 387 self.format = f 388 self.lineno = fileinput.filelineno() 389 390 self.lines.append( line ) 391 392 def add_block_lines( self ): 393 """Add the current accumulated lines and create a new block.""" 394 if self.lines != []: 395 block = SourceBlock( self, 396 self.filename, 397 self.lineno, 398 self.lines ) 399 400 self.blocks.append( block ) 401 self.format = None 402 self.lines = [] 403 404 # debugging only, not used in normal operations 405 def dump( self ): 406 """Print all blocks in a processor.""" 407 for b in self.blocks: 408 b.dump() 409 410# eof 411