1# Copyright 2017 The Abseil Authors. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Internal helper functions for Abseil Python flags library.""" 16 17import collections 18import os 19import re 20import struct 21import sys 22import textwrap 23try: 24 import fcntl 25except ImportError: 26 fcntl = None 27try: 28 # Importing termios will fail on non-unix platforms. 29 import termios 30except ImportError: 31 termios = None 32 33 34_DEFAULT_HELP_WIDTH = 80 # Default width of help output. 35# Minimal "sane" width of help output. We assume that any value below 40 is 36# unreasonable. 37_MIN_HELP_WIDTH = 40 38 39# Define the allowed error rate in an input string to get suggestions. 40# 41# We lean towards a high threshold because we tend to be matching a phrase, 42# and the simple algorithm used here is geared towards correcting word 43# spellings. 44# 45# For manual testing, consider "<command> --list" which produced a large number 46# of spurious suggestions when we used "least_errors > 0.5" instead of 47# "least_erros >= 0.5". 48_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50 49 50# Characters that cannot appear or are highly discouraged in an XML 1.0 51# document. (See http://www.w3.org/TR/REC-xml/#charsets or 52# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0) 53_ILLEGAL_XML_CHARS_REGEX = re.compile( 54 u'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]') 55 56# This is a set of module ids for the modules that disclaim key flags. 57# This module is explicitly added to this set so that we never consider it to 58# define key flag. 59disclaim_module_ids = set([id(sys.modules[__name__])]) 60 61 62# Define special flags here so that help may be generated for them. 63# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module. 64# Initialized inside flagvalues.py. 65SPECIAL_FLAGS = None 66 67 68# This points to the flags module, initialized in flags/__init__.py. 69# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into 70# account. 71FLAGS_MODULE = None 72 73 74class _ModuleObjectAndName( 75 collections.namedtuple('_ModuleObjectAndName', 'module module_name')): 76 """Module object and name. 77 78 Fields: 79 - module: object, module object. 80 - module_name: str, module name. 81 """ 82 83 84def get_module_object_and_name(globals_dict): 85 """Returns the module that defines a global environment, and its name. 86 87 Args: 88 globals_dict: A dictionary that should correspond to an environment 89 providing the values of the globals. 90 91 Returns: 92 _ModuleObjectAndName - pair of module object & module name. 93 Returns (None, None) if the module could not be identified. 94 """ 95 name = globals_dict.get('__name__', None) 96 module = sys.modules.get(name, None) 97 # Pick a more informative name for the main module. 98 return _ModuleObjectAndName(module, 99 (sys.argv[0] if name == '__main__' else name)) 100 101 102def get_calling_module_object_and_name(): 103 """Returns the module that's calling into this module. 104 105 We generally use this function to get the name of the module calling a 106 DEFINE_foo... function. 107 108 Returns: 109 The module object that called into this one. 110 111 Raises: 112 AssertionError: Raised when no calling module could be identified. 113 """ 114 for depth in range(1, sys.getrecursionlimit()): 115 # sys._getframe is the right thing to use here, as it's the best 116 # way to walk up the call stack. 117 globals_for_frame = sys._getframe(depth).f_globals # pylint: disable=protected-access 118 module, module_name = get_module_object_and_name(globals_for_frame) 119 if id(module) not in disclaim_module_ids and module_name is not None: 120 return _ModuleObjectAndName(module, module_name) 121 raise AssertionError('No module was found') 122 123 124def get_calling_module(): 125 """Returns the name of the module that's calling into this module.""" 126 return get_calling_module_object_and_name().module_name 127 128 129def create_xml_dom_element(doc, name, value): 130 """Returns an XML DOM element with name and text value. 131 132 Args: 133 doc: minidom.Document, the DOM document it should create nodes from. 134 name: str, the tag of XML element. 135 value: object, whose string representation will be used 136 as the value of the XML element. Illegal or highly discouraged xml 1.0 137 characters are stripped. 138 139 Returns: 140 An instance of minidom.Element. 141 """ 142 s = str(value) 143 if isinstance(value, bool): 144 # Display boolean values as the C++ flag library does: no caps. 145 s = s.lower() 146 # Remove illegal xml characters. 147 s = _ILLEGAL_XML_CHARS_REGEX.sub(u'', s) 148 149 e = doc.createElement(name) 150 e.appendChild(doc.createTextNode(s)) 151 return e 152 153 154def get_help_width(): 155 """Returns the integer width of help lines that is used in TextWrap.""" 156 if not sys.stdout.isatty() or termios is None or fcntl is None: 157 return _DEFAULT_HELP_WIDTH 158 try: 159 data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, '1234') 160 columns = struct.unpack('hh', data)[1] 161 # Emacs mode returns 0. 162 # Here we assume that any value below 40 is unreasonable. 163 if columns >= _MIN_HELP_WIDTH: 164 return columns 165 # Returning an int as default is fine, int(int) just return the int. 166 return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH)) 167 168 except (TypeError, IOError, struct.error): 169 return _DEFAULT_HELP_WIDTH 170 171 172def get_flag_suggestions(attempt, longopt_list): 173 """Returns helpful similar matches for an invalid flag.""" 174 # Don't suggest on very short strings, or if no longopts are specified. 175 if len(attempt) <= 2 or not longopt_list: 176 return [] 177 178 option_names = [v.split('=')[0] for v in longopt_list] 179 180 # Find close approximations in flag prefixes. 181 # This also handles the case where the flag is spelled right but ambiguous. 182 distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option) 183 for option in option_names] 184 # t[0] is distance, and sorting by t[1] allows us to have stable output. 185 distances.sort() 186 187 least_errors, _ = distances[0] 188 # Don't suggest excessively bad matches. 189 if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt): 190 return [] 191 192 suggestions = [] 193 for errors, name in distances: 194 if errors == least_errors: 195 suggestions.append(name) 196 else: 197 break 198 return suggestions 199 200 201def _damerau_levenshtein(a, b): 202 """Returns Damerau-Levenshtein edit distance from a to b.""" 203 memo = {} 204 205 def distance(x, y): 206 """Recursively defined string distance with memoization.""" 207 if (x, y) in memo: 208 return memo[x, y] 209 if not x: 210 d = len(y) 211 elif not y: 212 d = len(x) 213 else: 214 d = min( 215 distance(x[1:], y) + 1, # correct an insertion error 216 distance(x, y[1:]) + 1, # correct a deletion error 217 distance(x[1:], y[1:]) + (x[0] != y[0])) # correct a wrong character 218 if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]: 219 # Correct a transposition. 220 t = distance(x[2:], y[2:]) + 1 221 if d > t: 222 d = t 223 224 memo[x, y] = d 225 return d 226 return distance(a, b) 227 228 229def text_wrap(text, length=None, indent='', firstline_indent=None): 230 """Wraps a given text to a maximum line length and returns it. 231 232 It turns lines that only contain whitespace into empty lines, keeps new lines, 233 and expands tabs using 4 spaces. 234 235 Args: 236 text: str, text to wrap. 237 length: int, maximum length of a line, includes indentation. 238 If this is None then use get_help_width() 239 indent: str, indent for all but first line. 240 firstline_indent: str, indent for first line; if None, fall back to indent. 241 242 Returns: 243 str, the wrapped text. 244 245 Raises: 246 ValueError: Raised if indent or firstline_indent not shorter than length. 247 """ 248 # Get defaults where callee used None 249 if length is None: 250 length = get_help_width() 251 if indent is None: 252 indent = '' 253 if firstline_indent is None: 254 firstline_indent = indent 255 256 if len(indent) >= length: 257 raise ValueError('Length of indent exceeds length') 258 if len(firstline_indent) >= length: 259 raise ValueError('Length of first line indent exceeds length') 260 261 text = text.expandtabs(4) 262 263 result = [] 264 # Create one wrapper for the first paragraph and one for subsequent 265 # paragraphs that does not have the initial wrapping. 266 wrapper = textwrap.TextWrapper( 267 width=length, initial_indent=firstline_indent, subsequent_indent=indent) 268 subsequent_wrapper = textwrap.TextWrapper( 269 width=length, initial_indent=indent, subsequent_indent=indent) 270 271 # textwrap does not have any special treatment for newlines. From the docs: 272 # "...newlines may appear in the middle of a line and cause strange output. 273 # For this reason, text should be split into paragraphs (using 274 # str.splitlines() or similar) which are wrapped separately." 275 for paragraph in (p.strip() for p in text.splitlines()): 276 if paragraph: 277 result.extend(wrapper.wrap(paragraph)) 278 else: 279 result.append('') # Keep empty lines. 280 # Replace initial wrapper with wrapper for subsequent paragraphs. 281 wrapper = subsequent_wrapper 282 283 return '\n'.join(result) 284 285 286def flag_dict_to_args(flag_map, multi_flags=None): 287 """Convert a dict of values into process call parameters. 288 289 This method is used to convert a dictionary into a sequence of parameters 290 for a binary that parses arguments using this module. 291 292 Args: 293 flag_map: dict, a mapping where the keys are flag names (strings). 294 values are treated according to their type: 295 296 * If value is ``None``, then only the name is emitted. 297 * If value is ``True``, then only the name is emitted. 298 * If value is ``False``, then only the name prepended with 'no' is 299 emitted. 300 * If value is a string then ``--name=value`` is emitted. 301 * If value is a collection, this will emit 302 ``--name=value1,value2,value3``, unless the flag name is in 303 ``multi_flags``, in which case this will emit 304 ``--name=value1 --name=value2 --name=value3``. 305 * Everything else is converted to string an passed as such. 306 307 multi_flags: set, names (strings) of flags that should be treated as 308 multi-flags. 309 Yields: 310 sequence of string suitable for a subprocess execution. 311 """ 312 for key, value in flag_map.items(): 313 if value is None: 314 yield '--%s' % key 315 elif isinstance(value, bool): 316 if value: 317 yield '--%s' % key 318 else: 319 yield '--no%s' % key 320 elif isinstance(value, (bytes, type(u''))): 321 # We don't want strings to be handled like python collections. 322 yield '--%s=%s' % (key, value) 323 else: 324 # Now we attempt to deal with collections. 325 try: 326 if multi_flags and key in multi_flags: 327 for item in value: 328 yield '--%s=%s' % (key, str(item)) 329 else: 330 yield '--%s=%s' % (key, ','.join(str(item) for item in value)) 331 except TypeError: 332 # Default case. 333 yield '--%s=%s' % (key, value) 334 335 336def trim_docstring(docstring): 337 """Removes indentation from triple-quoted strings. 338 339 This is the function specified in PEP 257 to handle docstrings: 340 https://www.python.org/dev/peps/pep-0257/. 341 342 Args: 343 docstring: str, a python docstring. 344 345 Returns: 346 str, docstring with indentation removed. 347 """ 348 if not docstring: 349 return '' 350 351 # If you've got a line longer than this you have other problems... 352 max_indent = 1 << 29 353 354 # Convert tabs to spaces (following the normal Python rules) 355 # and split into a list of lines: 356 lines = docstring.expandtabs().splitlines() 357 358 # Determine minimum indentation (first line doesn't count): 359 indent = max_indent 360 for line in lines[1:]: 361 stripped = line.lstrip() 362 if stripped: 363 indent = min(indent, len(line) - len(stripped)) 364 # Remove indentation (first line is special): 365 trimmed = [lines[0].strip()] 366 if indent < max_indent: 367 for line in lines[1:]: 368 trimmed.append(line[indent:].rstrip()) 369 # Strip off trailing and leading blank lines: 370 while trimmed and not trimmed[-1]: 371 trimmed.pop() 372 while trimmed and not trimmed[0]: 373 trimmed.pop(0) 374 # Return a single string: 375 return '\n'.join(trimmed) 376 377 378def doc_to_help(doc): 379 """Takes a __doc__ string and reformats it as help.""" 380 381 # Get rid of starting and ending white space. Using lstrip() or even 382 # strip() could drop more than maximum of first line and right space 383 # of last line. 384 doc = doc.strip() 385 386 # Get rid of all empty lines. 387 whitespace_only_line = re.compile('^[ \t]+$', re.M) 388 doc = whitespace_only_line.sub('', doc) 389 390 # Cut out common space at line beginnings. 391 doc = trim_docstring(doc) 392 393 # Just like this module's comment, comments tend to be aligned somehow. 394 # In other words they all start with the same amount of white space. 395 # 1) keep double new lines; 396 # 2) keep ws after new lines if not empty line; 397 # 3) all other new lines shall be changed to a space; 398 # Solution: Match new lines between non white space and replace with space. 399 doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M) 400 401 return doc 402