1#!/usr/bin/env python3 2 3"""Assemble Mbed TLS change log entries into the change log file. 4 5Add changelog entries to the first level-2 section. 6Create a new level-2 section for unreleased changes if needed. 7Remove the input files unless --keep-entries is specified. 8 9In each level-3 section, entries are sorted in chronological order 10(oldest first). From oldest to newest: 11* Merged entry files are sorted according to their merge date (date of 12 the merge commit that brought the commit that created the file into 13 the target branch). 14* Committed but unmerged entry files are sorted according to the date 15 of the commit that adds them. 16* Uncommitted entry files are sorted according to their modification time. 17 18You must run this program from within a git working directory. 19""" 20 21# Copyright The Mbed TLS Contributors 22# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 23 24import argparse 25from collections import OrderedDict, namedtuple 26import datetime 27import functools 28import glob 29import os 30import re 31import subprocess 32import sys 33 34class InputFormatError(Exception): 35 def __init__(self, filename, line_number, message, *args, **kwargs): 36 message = '{}:{}: {}'.format(filename, line_number, 37 message.format(*args, **kwargs)) 38 super().__init__(message) 39 40class CategoryParseError(Exception): 41 def __init__(self, line_offset, error_message): 42 self.line_offset = line_offset 43 self.error_message = error_message 44 super().__init__('{}: {}'.format(line_offset, error_message)) 45 46class LostContent(Exception): 47 def __init__(self, filename, line): 48 message = ('Lost content from {}: "{}"'.format(filename, line)) 49 super().__init__(message) 50 51# The category names we use in the changelog. 52# If you edit this, update ChangeLog.d/README.md. 53STANDARD_CATEGORIES = ( 54 'API changes', 55 'Default behavior changes', 56 'Requirement changes', 57 'New deprecations', 58 'Removals', 59 'Features', 60 'Security', 61 'Bugfix', 62 'Changes', 63) 64 65# The maximum line length for an entry 66MAX_LINE_LENGTH = 80 67 68CategoryContent = namedtuple('CategoryContent', [ 69 'name', 'title_line', # Title text and line number of the title 70 'body', 'body_line', # Body text and starting line number of the body 71]) 72 73class ChangelogFormat: 74 """Virtual class documenting how to write a changelog format class.""" 75 76 @classmethod 77 def extract_top_version(cls, changelog_file_content): 78 """Split out the top version section. 79 80 If the top version is already released, create a new top 81 version section for an unreleased version. 82 83 Return ``(header, top_version_title, top_version_body, trailer)`` 84 where the "top version" is the existing top version section if it's 85 for unreleased changes, and a newly created section otherwise. 86 To assemble the changelog after modifying top_version_body, 87 concatenate the four pieces. 88 """ 89 raise NotImplementedError 90 91 @classmethod 92 def version_title_text(cls, version_title): 93 """Return the text of a formatted version section title.""" 94 raise NotImplementedError 95 96 @classmethod 97 def split_categories(cls, version_body): 98 """Split a changelog version section body into categories. 99 100 Return a list of `CategoryContent` the name is category title 101 without any formatting. 102 """ 103 raise NotImplementedError 104 105 @classmethod 106 def format_category(cls, title, body): 107 """Construct the text of a category section from its title and body.""" 108 raise NotImplementedError 109 110class TextChangelogFormat(ChangelogFormat): 111 """The traditional Mbed TLS changelog format.""" 112 113 _unreleased_version_text = '= Mbed TLS x.x.x branch released xxxx-xx-xx' 114 @classmethod 115 def is_released_version(cls, title): 116 # Look for an incomplete release date 117 return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) 118 119 _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', 120 re.DOTALL) 121 @classmethod 122 def extract_top_version(cls, changelog_file_content): 123 """A version section starts with a line starting with '='.""" 124 m = re.search(cls._top_version_re, changelog_file_content) 125 top_version_start = m.start(1) 126 top_version_end = m.end(2) 127 top_version_title = m.group(1) 128 top_version_body = m.group(2) 129 if cls.is_released_version(top_version_title): 130 top_version_end = top_version_start 131 top_version_title = cls._unreleased_version_text + '\n\n' 132 top_version_body = '' 133 return (changelog_file_content[:top_version_start], 134 top_version_title, top_version_body, 135 changelog_file_content[top_version_end:]) 136 137 @classmethod 138 def version_title_text(cls, version_title): 139 return re.sub(r'\n.*', version_title, re.DOTALL) 140 141 _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE) 142 @classmethod 143 def split_categories(cls, version_body): 144 """A category title is a line with the title in column 0.""" 145 if not version_body: 146 return [] 147 title_matches = list(re.finditer(cls._category_title_re, version_body)) 148 if not title_matches or title_matches[0].start() != 0: 149 # There is junk before the first category. 150 raise CategoryParseError(0, 'Junk found where category expected') 151 title_starts = [m.start(1) for m in title_matches] 152 body_starts = [m.end(0) for m in title_matches] 153 body_ends = title_starts[1:] + [len(version_body)] 154 bodies = [version_body[body_start:body_end].rstrip('\n') + '\n' 155 for (body_start, body_end) in zip(body_starts, body_ends)] 156 title_lines = [version_body[:pos].count('\n') for pos in title_starts] 157 body_lines = [version_body[:pos].count('\n') for pos in body_starts] 158 return [CategoryContent(title_match.group(1), title_line, 159 body, body_line) 160 for title_match, title_line, body, body_line 161 in zip(title_matches, title_lines, bodies, body_lines)] 162 163 @classmethod 164 def format_category(cls, title, body): 165 # `split_categories` ensures that each body ends with a newline. 166 # Make sure that there is additionally a blank line between categories. 167 if not body.endswith('\n\n'): 168 body += '\n' 169 return title + '\n' + body 170 171class ChangeLog: 172 """An Mbed TLS changelog. 173 174 A changelog file consists of some header text followed by one or 175 more version sections. The version sections are in reverse 176 chronological order. Each version section consists of a title and a body. 177 178 The body of a version section consists of zero or more category 179 subsections. Each category subsection consists of a title and a body. 180 181 A changelog entry file has the same format as the body of a version section. 182 183 A `ChangelogFormat` object defines the concrete syntax of the changelog. 184 Entry files must have the same format as the changelog file. 185 """ 186 187 # Only accept dotted version numbers (e.g. "3.1", not "3"). 188 # Refuse ".x" in a version number where x is a letter: this indicates 189 # a version that is not yet released. Something like "3.1a" is accepted. 190 _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+') 191 _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]') 192 _only_url_re = re.compile(r'^\s*\w+://\S+\s*$') 193 _has_url_re = re.compile(r'.*://.*') 194 195 def add_categories_from_text(self, filename, line_offset, 196 text, allow_unknown_category): 197 """Parse a version section or entry file.""" 198 try: 199 categories = self.format.split_categories(text) 200 except CategoryParseError as e: 201 raise InputFormatError(filename, line_offset + e.line_offset, 202 e.error_message) 203 for category in categories: 204 if not allow_unknown_category and \ 205 category.name not in self.categories: 206 raise InputFormatError(filename, 207 line_offset + category.title_line, 208 'Unknown category: "{}"', 209 category.name) 210 211 body_split = category.body.splitlines() 212 213 for line_number, line in enumerate(body_split, 1): 214 if not self._only_url_re.match(line) and \ 215 len(line) > MAX_LINE_LENGTH: 216 long_url_msg = '. URL exceeding length limit must be alone in its line.' \ 217 if self._has_url_re.match(line) else "" 218 raise InputFormatError(filename, 219 category.body_line + line_number, 220 'Line is longer than allowed: ' 221 'Length {} (Max {}){}', 222 len(line), MAX_LINE_LENGTH, 223 long_url_msg) 224 225 self.categories[category.name] += category.body 226 227 def __init__(self, input_stream, changelog_format): 228 """Create a changelog object. 229 230 Populate the changelog object from the content of the file 231 input_stream. 232 """ 233 self.format = changelog_format 234 whole_file = input_stream.read() 235 (self.header, 236 self.top_version_title, top_version_body, 237 self.trailer) = self.format.extract_top_version(whole_file) 238 # Split the top version section into categories. 239 self.categories = OrderedDict() 240 for category in STANDARD_CATEGORIES: 241 self.categories[category] = '' 242 offset = (self.header + self.top_version_title).count('\n') + 1 243 self.add_categories_from_text(input_stream.name, offset, 244 top_version_body, True) 245 246 def add_file(self, input_stream): 247 """Add changelog entries from a file. 248 """ 249 self.add_categories_from_text(input_stream.name, 1, 250 input_stream.read(), False) 251 252 def write(self, filename): 253 """Write the changelog to the specified file. 254 """ 255 with open(filename, 'w', encoding='utf-8') as out: 256 out.write(self.header) 257 out.write(self.top_version_title) 258 for title, body in self.categories.items(): 259 if not body: 260 continue 261 out.write(self.format.format_category(title, body)) 262 out.write(self.trailer) 263 264 265@functools.total_ordering 266class EntryFileSortKey: 267 """This classes defines an ordering on changelog entry files: older < newer. 268 269 * Merged entry files are sorted according to their merge date (date of 270 the merge commit that brought the commit that created the file into 271 the target branch). 272 * Committed but unmerged entry files are sorted according to the date 273 of the commit that adds them. 274 * Uncommitted entry files are sorted according to their modification time. 275 276 This class assumes that the file is in a git working directory with 277 the target branch checked out. 278 """ 279 280 # Categories of files. A lower number is considered older. 281 MERGED = 0 282 COMMITTED = 1 283 LOCAL = 2 284 285 @staticmethod 286 def creation_hash(filename): 287 """Return the git commit id at which the given file was created. 288 289 Return None if the file was never checked into git. 290 """ 291 hashes = subprocess.check_output(['git', 'log', '--format=%H', 292 '--follow', 293 '--', filename]) 294 m = re.search('(.+)$', hashes.decode('ascii')) 295 if not m: 296 # The git output is empty. This means that the file was 297 # never checked in. 298 return None 299 # The last commit in the log is the oldest one, which is when the 300 # file was created. 301 return m.group(0) 302 303 @staticmethod 304 def list_merges(some_hash, target, *options): 305 """List merge commits from some_hash to target. 306 307 Pass options to git to select which commits are included. 308 """ 309 text = subprocess.check_output(['git', 'rev-list', 310 '--merges', *options, 311 '..'.join([some_hash, target])]) 312 return text.decode('ascii').rstrip('\n').split('\n') 313 314 @classmethod 315 def merge_hash(cls, some_hash): 316 """Return the git commit id at which the given commit was merged. 317 318 Return None if the given commit was never merged. 319 """ 320 target = 'HEAD' 321 # List the merges from some_hash to the target in two ways. 322 # The ancestry list is the ones that are both descendants of 323 # some_hash and ancestors of the target. 324 ancestry = frozenset(cls.list_merges(some_hash, target, 325 '--ancestry-path')) 326 # The first_parents list only contains merges that are directly 327 # on the target branch. We want it in reverse order (oldest first). 328 first_parents = cls.list_merges(some_hash, target, 329 '--first-parent', '--reverse') 330 # Look for the oldest merge commit that's both on the direct path 331 # and directly on the target branch. That's the place where some_hash 332 # was merged on the target branch. See 333 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit 334 for commit in first_parents: 335 if commit in ancestry: 336 return commit 337 return None 338 339 @staticmethod 340 def commit_timestamp(commit_id): 341 """Return the timestamp of the given commit.""" 342 text = subprocess.check_output(['git', 'show', '-s', 343 '--format=%ct', 344 commit_id]) 345 return datetime.datetime.utcfromtimestamp(int(text)) 346 347 @staticmethod 348 def file_timestamp(filename): 349 """Return the modification timestamp of the given file.""" 350 mtime = os.stat(filename).st_mtime 351 return datetime.datetime.fromtimestamp(mtime) 352 353 def __init__(self, filename): 354 """Determine position of the file in the changelog entry order. 355 356 This constructor returns an object that can be used with comparison 357 operators, with `sort` and `sorted`, etc. Older entries are sorted 358 before newer entries. 359 """ 360 self.filename = filename 361 creation_hash = self.creation_hash(filename) 362 if not creation_hash: 363 self.category = self.LOCAL 364 self.datetime = self.file_timestamp(filename) 365 return 366 merge_hash = self.merge_hash(creation_hash) 367 if not merge_hash: 368 self.category = self.COMMITTED 369 self.datetime = self.commit_timestamp(creation_hash) 370 return 371 self.category = self.MERGED 372 self.datetime = self.commit_timestamp(merge_hash) 373 374 def sort_key(self): 375 """"Return a concrete sort key for this entry file sort key object. 376 377 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. 378 """ 379 return (self.category, self.datetime, self.filename) 380 381 def __eq__(self, other): 382 return self.sort_key() == other.sort_key() 383 384 def __lt__(self, other): 385 return self.sort_key() < other.sort_key() 386 387 388def check_output(generated_output_file, main_input_file, merged_files): 389 """Make sanity checks on the generated output. 390 391 The intent of these sanity checks is to have reasonable confidence 392 that no content has been lost. 393 394 The sanity check is that every line that is present in an input file 395 is also present in an output file. This is not perfect but good enough 396 for now. 397 """ 398 with open(generated_output_file, 'r', encoding='utf-8') as out_fd: 399 generated_output = set(out_fd) 400 with open(main_input_file, 'r', encoding='utf-8') as in_fd: 401 for line in in_fd: 402 if line not in generated_output: 403 raise LostContent('original file', line) 404 for merged_file in merged_files: 405 with open(merged_file, 'r', encoding='utf-8') as in_fd: 406 for line in in_fd: 407 if line not in generated_output: 408 raise LostContent(merged_file, line) 409 410def finish_output(changelog, output_file, input_file, merged_files): 411 """Write the changelog to the output file. 412 413 The input file and the list of merged files are used only for sanity 414 checks on the output. 415 """ 416 if os.path.exists(output_file) and not os.path.isfile(output_file): 417 # The output is a non-regular file (e.g. pipe). Write to it directly. 418 output_temp = output_file 419 else: 420 # The output is a regular file. Write to a temporary file, 421 # then move it into place atomically. 422 output_temp = output_file + '.tmp' 423 changelog.write(output_temp) 424 check_output(output_temp, input_file, merged_files) 425 if output_temp != output_file: 426 os.rename(output_temp, output_file) 427 428def remove_merged_entries(files_to_remove): 429 for filename in files_to_remove: 430 os.remove(filename) 431 432def list_files_to_merge(options): 433 """List the entry files to merge, oldest first. 434 435 "Oldest" is defined by `EntryFileSortKey`. 436 """ 437 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) 438 files_to_merge.sort(key=EntryFileSortKey) 439 return files_to_merge 440 441def merge_entries(options): 442 """Merge changelog entries into the changelog file. 443 444 Read the changelog file from options.input. 445 Read entries to merge from the directory options.dir. 446 Write the new changelog to options.output. 447 Remove the merged entries if options.keep_entries is false. 448 """ 449 with open(options.input, 'r', encoding='utf-8') as input_file: 450 changelog = ChangeLog(input_file, TextChangelogFormat) 451 files_to_merge = list_files_to_merge(options) 452 if not files_to_merge: 453 sys.stderr.write('There are no pending changelog entries.\n') 454 return 455 for filename in files_to_merge: 456 with open(filename, 'r', encoding='utf-8') as input_file: 457 changelog.add_file(input_file) 458 finish_output(changelog, options.output, options.input, files_to_merge) 459 if not options.keep_entries: 460 remove_merged_entries(files_to_merge) 461 462def show_file_timestamps(options): 463 """List the files to merge and their timestamp. 464 465 This is only intended for debugging purposes. 466 """ 467 files = list_files_to_merge(options) 468 for filename in files: 469 ts = EntryFileSortKey(filename) 470 print(ts.category, ts.datetime, filename) 471 472def set_defaults(options): 473 """Add default values for missing options.""" 474 output_file = getattr(options, 'output', None) 475 if output_file is None: 476 options.output = options.input 477 if getattr(options, 'keep_entries', None) is None: 478 options.keep_entries = (output_file is not None) 479 480def main(): 481 """Command line entry point.""" 482 parser = argparse.ArgumentParser(description=__doc__) 483 parser.add_argument('--dir', '-d', metavar='DIR', 484 default='ChangeLog.d', 485 help='Directory to read entries from' 486 ' (default: ChangeLog.d)') 487 parser.add_argument('--input', '-i', metavar='FILE', 488 default='ChangeLog', 489 help='Existing changelog file to read from and augment' 490 ' (default: ChangeLog)') 491 parser.add_argument('--keep-entries', 492 action='store_true', dest='keep_entries', default=None, 493 help='Keep the files containing entries' 494 ' (default: remove them if --output/-o is not specified)') 495 parser.add_argument('--no-keep-entries', 496 action='store_false', dest='keep_entries', 497 help='Remove the files containing entries after they are merged' 498 ' (default: remove them if --output/-o is not specified)') 499 parser.add_argument('--output', '-o', metavar='FILE', 500 help='Output changelog file' 501 ' (default: overwrite the input)') 502 parser.add_argument('--list-files-only', 503 action='store_true', 504 help=('Only list the files that would be processed ' 505 '(with some debugging information)')) 506 options = parser.parse_args() 507 set_defaults(options) 508 if options.list_files_only: 509 show_file_timestamps(options) 510 return 511 merge_entries(options) 512 513if __name__ == '__main__': 514 main() 515