• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The Abseil Authors.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Internal helper functions for Abseil Python flags library."""
16
17import collections
18import os
19import re
20import struct
21import sys
22import textwrap
23try:
24  import fcntl
25except ImportError:
26  fcntl = None
27try:
28  # Importing termios will fail on non-unix platforms.
29  import termios
30except ImportError:
31  termios = None
32
33
34_DEFAULT_HELP_WIDTH = 80  # Default width of help output.
35# Minimal "sane" width of help output. We assume that any value below 40 is
36# unreasonable.
37_MIN_HELP_WIDTH = 40
38
39# Define the allowed error rate in an input string to get suggestions.
40#
41# We lean towards a high threshold because we tend to be matching a phrase,
42# and the simple algorithm used here is geared towards correcting word
43# spellings.
44#
45# For manual testing, consider "<command> --list" which produced a large number
46# of spurious suggestions when we used "least_errors > 0.5" instead of
47# "least_erros >= 0.5".
48_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50
49
50# Characters that cannot appear or are highly discouraged in an XML 1.0
51# document. (See http://www.w3.org/TR/REC-xml/#charsets or
52# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0)
53_ILLEGAL_XML_CHARS_REGEX = re.compile(
54    u'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]')
55
56# This is a set of module ids for the modules that disclaim key flags.
57# This module is explicitly added to this set so that we never consider it to
58# define key flag.
59disclaim_module_ids = set([id(sys.modules[__name__])])
60
61
62# Define special flags here so that help may be generated for them.
63# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module.
64# Initialized inside flagvalues.py.
65SPECIAL_FLAGS = None
66
67
68# This points to the flags module, initialized in flags/__init__.py.
69# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into
70# account.
71FLAGS_MODULE = None
72
73
74class _ModuleObjectAndName(
75    collections.namedtuple('_ModuleObjectAndName', 'module module_name')):
76  """Module object and name.
77
78  Fields:
79  - module: object, module object.
80  - module_name: str, module name.
81  """
82
83
84def get_module_object_and_name(globals_dict):
85  """Returns the module that defines a global environment, and its name.
86
87  Args:
88    globals_dict: A dictionary that should correspond to an environment
89      providing the values of the globals.
90
91  Returns:
92    _ModuleObjectAndName - pair of module object & module name.
93    Returns (None, None) if the module could not be identified.
94  """
95  name = globals_dict.get('__name__', None)
96  module = sys.modules.get(name, None)
97  # Pick a more informative name for the main module.
98  return _ModuleObjectAndName(module,
99                              (sys.argv[0] if name == '__main__' else name))
100
101
102def get_calling_module_object_and_name():
103  """Returns the module that's calling into this module.
104
105  We generally use this function to get the name of the module calling a
106  DEFINE_foo... function.
107
108  Returns:
109    The module object that called into this one.
110
111  Raises:
112    AssertionError: Raised when no calling module could be identified.
113  """
114  for depth in range(1, sys.getrecursionlimit()):
115    # sys._getframe is the right thing to use here, as it's the best
116    # way to walk up the call stack.
117    globals_for_frame = sys._getframe(depth).f_globals  # pylint: disable=protected-access
118    module, module_name = get_module_object_and_name(globals_for_frame)
119    if id(module) not in disclaim_module_ids and module_name is not None:
120      return _ModuleObjectAndName(module, module_name)
121  raise AssertionError('No module was found')
122
123
124def get_calling_module():
125  """Returns the name of the module that's calling into this module."""
126  return get_calling_module_object_and_name().module_name
127
128
129def create_xml_dom_element(doc, name, value):
130  """Returns an XML DOM element with name and text value.
131
132  Args:
133    doc: minidom.Document, the DOM document it should create nodes from.
134    name: str, the tag of XML element.
135    value: object, whose string representation will be used
136        as the value of the XML element. Illegal or highly discouraged xml 1.0
137        characters are stripped.
138
139  Returns:
140    An instance of minidom.Element.
141  """
142  s = str(value)
143  if isinstance(value, bool):
144    # Display boolean values as the C++ flag library does: no caps.
145    s = s.lower()
146  # Remove illegal xml characters.
147  s = _ILLEGAL_XML_CHARS_REGEX.sub(u'', s)
148
149  e = doc.createElement(name)
150  e.appendChild(doc.createTextNode(s))
151  return e
152
153
154def get_help_width():
155  """Returns the integer width of help lines that is used in TextWrap."""
156  if not sys.stdout.isatty() or termios is None or fcntl is None:
157    return _DEFAULT_HELP_WIDTH
158  try:
159    data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, '1234')
160    columns = struct.unpack('hh', data)[1]
161    # Emacs mode returns 0.
162    # Here we assume that any value below 40 is unreasonable.
163    if columns >= _MIN_HELP_WIDTH:
164      return columns
165    # Returning an int as default is fine, int(int) just return the int.
166    return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH))
167
168  except (TypeError, IOError, struct.error):
169    return _DEFAULT_HELP_WIDTH
170
171
172def get_flag_suggestions(attempt, longopt_list):
173  """Returns helpful similar matches for an invalid flag."""
174  # Don't suggest on very short strings, or if no longopts are specified.
175  if len(attempt) <= 2 or not longopt_list:
176    return []
177
178  option_names = [v.split('=')[0] for v in longopt_list]
179
180  # Find close approximations in flag prefixes.
181  # This also handles the case where the flag is spelled right but ambiguous.
182  distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option)
183               for option in option_names]
184  # t[0] is distance, and sorting by t[1] allows us to have stable output.
185  distances.sort()
186
187  least_errors, _ = distances[0]
188  # Don't suggest excessively bad matches.
189  if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt):
190    return []
191
192  suggestions = []
193  for errors, name in distances:
194    if errors == least_errors:
195      suggestions.append(name)
196    else:
197      break
198  return suggestions
199
200
201def _damerau_levenshtein(a, b):
202  """Returns Damerau-Levenshtein edit distance from a to b."""
203  memo = {}
204
205  def distance(x, y):
206    """Recursively defined string distance with memoization."""
207    if (x, y) in memo:
208      return memo[x, y]
209    if not x:
210      d = len(y)
211    elif not y:
212      d = len(x)
213    else:
214      d = min(
215          distance(x[1:], y) + 1,  # correct an insertion error
216          distance(x, y[1:]) + 1,  # correct a deletion error
217          distance(x[1:], y[1:]) + (x[0] != y[0]))  # correct a wrong character
218      if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]:
219        # Correct a transposition.
220        t = distance(x[2:], y[2:]) + 1
221        if d > t:
222          d = t
223
224    memo[x, y] = d
225    return d
226  return distance(a, b)
227
228
229def text_wrap(text, length=None, indent='', firstline_indent=None):
230  """Wraps a given text to a maximum line length and returns it.
231
232  It turns lines that only contain whitespace into empty lines, keeps new lines,
233  and expands tabs using 4 spaces.
234
235  Args:
236    text: str, text to wrap.
237    length: int, maximum length of a line, includes indentation.
238        If this is None then use get_help_width()
239    indent: str, indent for all but first line.
240    firstline_indent: str, indent for first line; if None, fall back to indent.
241
242  Returns:
243    str, the wrapped text.
244
245  Raises:
246    ValueError: Raised if indent or firstline_indent not shorter than length.
247  """
248  # Get defaults where callee used None
249  if length is None:
250    length = get_help_width()
251  if indent is None:
252    indent = ''
253  if firstline_indent is None:
254    firstline_indent = indent
255
256  if len(indent) >= length:
257    raise ValueError('Length of indent exceeds length')
258  if len(firstline_indent) >= length:
259    raise ValueError('Length of first line indent exceeds length')
260
261  text = text.expandtabs(4)
262
263  result = []
264  # Create one wrapper for the first paragraph and one for subsequent
265  # paragraphs that does not have the initial wrapping.
266  wrapper = textwrap.TextWrapper(
267      width=length, initial_indent=firstline_indent, subsequent_indent=indent)
268  subsequent_wrapper = textwrap.TextWrapper(
269      width=length, initial_indent=indent, subsequent_indent=indent)
270
271  # textwrap does not have any special treatment for newlines. From the docs:
272  # "...newlines may appear in the middle of a line and cause strange output.
273  # For this reason, text should be split into paragraphs (using
274  # str.splitlines() or similar) which are wrapped separately."
275  for paragraph in (p.strip() for p in text.splitlines()):
276    if paragraph:
277      result.extend(wrapper.wrap(paragraph))
278    else:
279      result.append('')  # Keep empty lines.
280    # Replace initial wrapper with wrapper for subsequent paragraphs.
281    wrapper = subsequent_wrapper
282
283  return '\n'.join(result)
284
285
286def flag_dict_to_args(flag_map, multi_flags=None):
287  """Convert a dict of values into process call parameters.
288
289  This method is used to convert a dictionary into a sequence of parameters
290  for a binary that parses arguments using this module.
291
292  Args:
293    flag_map: dict, a mapping where the keys are flag names (strings).
294        values are treated according to their type:
295
296        * If value is ``None``, then only the name is emitted.
297        * If value is ``True``, then only the name is emitted.
298        * If value is ``False``, then only the name prepended with 'no' is
299          emitted.
300        * If value is a string then ``--name=value`` is emitted.
301        * If value is a collection, this will emit
302          ``--name=value1,value2,value3``, unless the flag name is in
303          ``multi_flags``, in which case this will emit
304          ``--name=value1 --name=value2 --name=value3``.
305        * Everything else is converted to string an passed as such.
306
307    multi_flags: set, names (strings) of flags that should be treated as
308        multi-flags.
309  Yields:
310    sequence of string suitable for a subprocess execution.
311  """
312  for key, value in flag_map.items():
313    if value is None:
314      yield '--%s' % key
315    elif isinstance(value, bool):
316      if value:
317        yield '--%s' % key
318      else:
319        yield '--no%s' % key
320    elif isinstance(value, (bytes, type(u''))):
321      # We don't want strings to be handled like python collections.
322      yield '--%s=%s' % (key, value)
323    else:
324      # Now we attempt to deal with collections.
325      try:
326        if multi_flags and key in multi_flags:
327          for item in value:
328            yield '--%s=%s' % (key, str(item))
329        else:
330          yield '--%s=%s' % (key, ','.join(str(item) for item in value))
331      except TypeError:
332        # Default case.
333        yield '--%s=%s' % (key, value)
334
335
336def trim_docstring(docstring):
337  """Removes indentation from triple-quoted strings.
338
339  This is the function specified in PEP 257 to handle docstrings:
340  https://www.python.org/dev/peps/pep-0257/.
341
342  Args:
343    docstring: str, a python docstring.
344
345  Returns:
346    str, docstring with indentation removed.
347  """
348  if not docstring:
349    return ''
350
351  # If you've got a line longer than this you have other problems...
352  max_indent = 1 << 29
353
354  # Convert tabs to spaces (following the normal Python rules)
355  # and split into a list of lines:
356  lines = docstring.expandtabs().splitlines()
357
358  # Determine minimum indentation (first line doesn't count):
359  indent = max_indent
360  for line in lines[1:]:
361    stripped = line.lstrip()
362    if stripped:
363      indent = min(indent, len(line) - len(stripped))
364  # Remove indentation (first line is special):
365  trimmed = [lines[0].strip()]
366  if indent < max_indent:
367    for line in lines[1:]:
368      trimmed.append(line[indent:].rstrip())
369  # Strip off trailing and leading blank lines:
370  while trimmed and not trimmed[-1]:
371    trimmed.pop()
372  while trimmed and not trimmed[0]:
373    trimmed.pop(0)
374  # Return a single string:
375  return '\n'.join(trimmed)
376
377
378def doc_to_help(doc):
379  """Takes a __doc__ string and reformats it as help."""
380
381  # Get rid of starting and ending white space. Using lstrip() or even
382  # strip() could drop more than maximum of first line and right space
383  # of last line.
384  doc = doc.strip()
385
386  # Get rid of all empty lines.
387  whitespace_only_line = re.compile('^[ \t]+$', re.M)
388  doc = whitespace_only_line.sub('', doc)
389
390  # Cut out common space at line beginnings.
391  doc = trim_docstring(doc)
392
393  # Just like this module's comment, comments tend to be aligned somehow.
394  # In other words they all start with the same amount of white space.
395  # 1) keep double new lines;
396  # 2) keep ws after new lines if not empty line;
397  # 3) all other new lines shall be changed to a space;
398  # Solution: Match new lines between non white space and replace with space.
399  doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M)
400
401  return doc
402