• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# pep8.py - Check Python source code formatting, according to PEP 8
3# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
4#
5# Permission is hereby granted, free of charge, to any person
6# obtaining a copy of this software and associated documentation files
7# (the "Software"), to deal in the Software without restriction,
8# including without limitation the rights to use, copy, modify, merge,
9# publish, distribute, sublicense, and/or sell copies of the Software,
10# and to permit persons to whom the Software is furnished to do so,
11# subject to the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23# SOFTWARE.
24
25"""
26Check Python source code formatting, according to PEP 8:
27http://www.python.org/dev/peps/pep-0008/
28
29For usage and a list of options, try this:
30$ python pep8.py -h
31
32This program and its regression test suite live here:
33http://github.com/jcrocholl/pep8
34
35Groups of errors and warnings:
36E errors
37W warnings
38100 indentation
39200 whitespace
40300 blank lines
41400 imports
42500 line length
43600 deprecation
44700 statements
45
46You can add checks to this program by writing plugins. Each plugin is
47a simple function that is called for each line of source code, either
48physical or logical.
49
50Physical line:
51- Raw line of text from the input file.
52
53Logical line:
54- Multi-line statements converted to a single line.
55- Stripped left and right.
56- Contents of strings replaced with 'xxx' of same length.
57- Comments removed.
58
59The check function requests physical or logical lines by the name of
60the first argument:
61
62def maximum_line_length(physical_line)
63def extraneous_whitespace(logical_line)
64def blank_lines(logical_line, blank_lines, indent_level, line_number)
65
66The last example above demonstrates how check plugins can request
67additional information with extra arguments. All attributes of the
68Checker object are available. Some examples:
69
70lines: a list of the raw lines from the input file
71tokens: the tokens that contribute to this logical line
72line_number: line number in the input file
73blank_lines: blank lines before this one
74indent_char: first indentation character in this file (' ' or '\t')
75indent_level: indentation (with tabs expanded to multiples of 8)
76previous_indent_level: indentation on previous line
77previous_logical: previous logical line
78
79The docstring of each check function shall be the relevant part of
80text from PEP 8. It is printed if the user enables --show-pep8.
81Several docstrings contain examples directly from the PEP 8 document.
82
83Okay: spam(ham[1], {eggs: 2})
84E201: spam( ham[1], {eggs: 2})
85
86These examples are verified automatically when pep8.py is run with the
87--doctest option. You can add examples for your own check functions.
88The format is simple: "Okay" or error/warning code followed by colon
89and space, the rest of the line is example source code. If you put 'r'
90before the docstring, you can use \n for newline, \t for tab and \s
91for space.
92
93"""
94
95__version__ = '0.5.0'
96
97import os
98import sys
99import re
100import time
101import inspect
102import tokenize
103from optparse import OptionParser
104from keyword import iskeyword
105from fnmatch import fnmatch
106
107DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
108DEFAULT_IGNORE = ['E24']
109
110INDENT_REGEX = re.compile(r'([ \t]*)')
111RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
112SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
113ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
114E301NOT_REGEX = re.compile(r'class |def |u?r?["\']')
115
116WHITESPACE = ' \t'
117
118BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>',
119    '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=',
120    '%',  '^',  '&',  '|',  '=',  '/',  '//',  '>',  '<',  '>>',  '<<']
121UNARY_OPERATORS = ['**', '*', '+', '-']
122OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS
123
124options = None
125args = None
126
127
128##############################################################################
129# Plugins (check functions) for physical lines
130##############################################################################
131
132
133def tabs_or_spaces(physical_line, indent_char):
134    r"""
135    Never mix tabs and spaces.
136
137    The most popular way of indenting Python is with spaces only.  The
138    second-most popular way is with tabs only.  Code indented with a mixture
139    of tabs and spaces should be converted to using spaces exclusively.  When
140    invoking the Python command line interpreter with the -t option, it issues
141    warnings about code that illegally mixes tabs and spaces.  When using -tt
142    these warnings become errors.  These options are highly recommended!
143
144    Okay: if a == 0:\n        a = 1\n        b = 1
145    E101: if a == 0:\n        a = 1\n\tb = 1
146    """
147    indent = INDENT_REGEX.match(physical_line).group(1)
148    for offset, char in enumerate(indent):
149        if char != indent_char:
150            return offset, "E101 indentation contains mixed spaces and tabs"
151
152
153def tabs_obsolete(physical_line):
154    r"""
155    For new projects, spaces-only are strongly recommended over tabs.  Most
156    editors have features that make this easy to do.
157
158    Okay: if True:\n    return
159    W191: if True:\n\treturn
160    """
161    indent = INDENT_REGEX.match(physical_line).group(1)
162    if indent.count('\t'):
163        return indent.index('\t'), "W191 indentation contains tabs"
164
165
166def trailing_whitespace(physical_line):
167    """
168    JCR: Trailing whitespace is superfluous.
169
170    Okay: spam(1)
171    W291: spam(1)\s
172    """
173    physical_line = physical_line.rstrip('\n')    # chr(10), newline
174    physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
175    physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
176    stripped = physical_line.rstrip()
177    if physical_line != stripped:
178        return len(stripped), "W291 trailing whitespace"
179
180
181def trailing_blank_lines(physical_line, lines, line_number):
182    r"""
183    JCR: Trailing blank lines are superfluous.
184
185    Okay: spam(1)
186    W391: spam(1)\n
187    """
188    if physical_line.strip() == '' and line_number == len(lines):
189        return 0, "W391 blank line at end of file"
190
191
192def missing_newline(physical_line):
193    """
194    JCR: The last line should have a newline.
195    """
196    if physical_line.rstrip() == physical_line:
197        return len(physical_line), "W292 no newline at end of file"
198
199
200def maximum_line_length(physical_line):
201    """
202    Limit all lines to a maximum of 79 characters.
203
204    There are still many devices around that are limited to 80 character
205    lines; plus, limiting windows to 80 characters makes it possible to have
206    several windows side-by-side.  The default wrapping on such devices looks
207    ugly.  Therefore, please limit all lines to a maximum of 79 characters.
208    For flowing long blocks of text (docstrings or comments), limiting the
209    length to 72 characters is recommended.
210    """
211    length = len(physical_line.rstrip())
212    if length > 79:
213        return 79, "E501 line too long (%d characters)" % length
214
215
216##############################################################################
217# Plugins (check functions) for logical lines
218##############################################################################
219
220
221def blank_lines(logical_line, blank_lines, indent_level, line_number,
222                previous_logical, blank_lines_before_comment):
223    r"""
224    Separate top-level function and class definitions with two blank lines.
225
226    Method definitions inside a class are separated by a single blank line.
227
228    Extra blank lines may be used (sparingly) to separate groups of related
229    functions.  Blank lines may be omitted between a bunch of related
230    one-liners (e.g. a set of dummy implementations).
231
232    Use blank lines in functions, sparingly, to indicate logical sections.
233
234    Okay: def a():\n    pass\n\n\ndef b():\n    pass
235    Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
236
237    E301: class Foo:\n    b = 0\n    def bar():\n        pass
238    E302: def a():\n    pass\n\ndef b(n):\n    pass
239    E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
240    E303: def a():\n\n\n\n    pass
241    E304: @decorator\n\ndef a():\n    pass
242    """
243    if line_number == 1:
244        return  # Don't expect blank lines before the first line
245    max_blank_lines = max(blank_lines, blank_lines_before_comment)
246    if previous_logical.startswith('@'):
247        if max_blank_lines:
248            return 0, "E304 blank lines found after function decorator"
249    elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
250        return 0, "E303 too many blank lines (%d)" % max_blank_lines
251    elif (logical_line.startswith('def ') or
252          logical_line.startswith('class ') or
253          logical_line.startswith('@')):
254        if indent_level:
255            if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)):
256                return 0, "E301 expected 1 blank line, found 0"
257        elif max_blank_lines != 2:
258            return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
259
260
261def extraneous_whitespace(logical_line):
262    """
263    Avoid extraneous whitespace in the following situations:
264
265    - Immediately inside parentheses, brackets or braces.
266
267    - Immediately before a comma, semicolon, or colon.
268
269    Okay: spam(ham[1], {eggs: 2})
270    E201: spam( ham[1], {eggs: 2})
271    E201: spam(ham[ 1], {eggs: 2})
272    E201: spam(ham[1], { eggs: 2})
273    E202: spam(ham[1], {eggs: 2} )
274    E202: spam(ham[1 ], {eggs: 2})
275    E202: spam(ham[1], {eggs: 2 })
276
277    E203: if x == 4: print x, y; x, y = y , x
278    E203: if x == 4: print x, y ; x, y = y, x
279    E203: if x == 4 : print x, y; x, y = y, x
280    """
281    line = logical_line
282    for char in '([{':
283        found = line.find(char + ' ')
284        if found > -1:
285            return found + 1, "E201 whitespace after '%s'" % char
286    for char in '}])':
287        found = line.find(' ' + char)
288        if found > -1 and line[found - 1] != ',':
289            return found, "E202 whitespace before '%s'" % char
290    for char in ',;:':
291        found = line.find(' ' + char)
292        if found > -1:
293            return found, "E203 whitespace before '%s'" % char
294
295
296def missing_whitespace(logical_line):
297    """
298    JCR: Each comma, semicolon or colon should be followed by whitespace.
299
300    Okay: [a, b]
301    Okay: (3,)
302    Okay: a[1:4]
303    Okay: a[:4]
304    Okay: a[1:]
305    Okay: a[1:4:2]
306    E231: ['a','b']
307    E231: foo(bar,baz)
308    """
309    line = logical_line
310    for index in range(len(line) - 1):
311        char = line[index]
312        if char in ',;:' and line[index + 1] not in WHITESPACE:
313            before = line[:index]
314            if char == ':' and before.count('[') > before.count(']'):
315                continue  # Slice syntax, no space required
316            if char == ',' and line[index + 1] == ')':
317                continue  # Allow tuple with only one element: (3,)
318            return index, "E231 missing whitespace after '%s'" % char
319
320
321def indentation(logical_line, previous_logical, indent_char,
322                indent_level, previous_indent_level):
323    r"""
324    Use 4 spaces per indentation level.
325
326    For really old code that you don't want to mess up, you can continue to
327    use 8-space tabs.
328
329    Okay: a = 1
330    Okay: if a == 0:\n    a = 1
331    E111:   a = 1
332
333    Okay: for item in items:\n    pass
334    E112: for item in items:\npass
335
336    Okay: a = 1\nb = 2
337    E113: a = 1\n    b = 2
338    """
339    if indent_char == ' ' and indent_level % 4:
340        return 0, "E111 indentation is not a multiple of four"
341    indent_expect = previous_logical.endswith(':')
342    if indent_expect and indent_level <= previous_indent_level:
343        return 0, "E112 expected an indented block"
344    if indent_level > previous_indent_level and not indent_expect:
345        return 0, "E113 unexpected indentation"
346
347
348def whitespace_before_parameters(logical_line, tokens):
349    """
350    Avoid extraneous whitespace in the following situations:
351
352    - Immediately before the open parenthesis that starts the argument
353      list of a function call.
354
355    - Immediately before the open parenthesis that starts an indexing or
356      slicing.
357
358    Okay: spam(1)
359    E211: spam (1)
360
361    Okay: dict['key'] = list[index]
362    E211: dict ['key'] = list[index]
363    E211: dict['key'] = list [index]
364    """
365    prev_type = tokens[0][0]
366    prev_text = tokens[0][1]
367    prev_end = tokens[0][3]
368    for index in range(1, len(tokens)):
369        token_type, text, start, end, line = tokens[index]
370        if (token_type == tokenize.OP and
371            text in '([' and
372            start != prev_end and
373            prev_type == tokenize.NAME and
374            (index < 2 or tokens[index - 2][1] != 'class') and
375            (not iskeyword(prev_text))):
376            return prev_end, "E211 whitespace before '%s'" % text
377        prev_type = token_type
378        prev_text = text
379        prev_end = end
380
381
382def whitespace_around_operator(logical_line):
383    """
384    Avoid extraneous whitespace in the following situations:
385
386    - More than one space around an assignment (or other) operator to
387      align it with another.
388
389    Okay: a = 12 + 3
390    E221: a = 4  + 5
391    E222: a = 4 +  5
392    E223: a = 4\t+ 5
393    E224: a = 4 +\t5
394    """
395    line = logical_line
396    for operator in OPERATORS:
397        found = line.find('  ' + operator)
398        if found > -1:
399            return found, "E221 multiple spaces before operator"
400        found = line.find(operator + '  ')
401        if found > -1:
402            return found, "E222 multiple spaces after operator"
403        found = line.find('\t' + operator)
404        if found > -1:
405            return found, "E223 tab before operator"
406        found = line.find(operator + '\t')
407        if found > -1:
408            return found, "E224 tab after operator"
409
410
411def missing_whitespace_around_operator(logical_line, tokens):
412    r"""
413    - Always surround these binary operators with a single space on
414      either side: assignment (=), augmented assignment (+=, -= etc.),
415      comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
416      Booleans (and, or, not).
417
418    - Use spaces around arithmetic operators.
419
420    Okay: i = i + 1
421    Okay: submitted += 1
422    Okay: x = x * 2 - 1
423    Okay: hypot2 = x * x + y * y
424    Okay: c = (a + b) * (a - b)
425    Okay: foo(bar, key='word', *args, **kwargs)
426    Okay: baz(**kwargs)
427    Okay: negative = -1
428    Okay: spam(-1)
429    Okay: alpha[:-i]
430    Okay: if not -5 < x < +5:\n    pass
431    Okay: lambda *args, **kw: (args, kw)
432
433    E225: i=i+1
434    E225: submitted +=1
435    E225: x = x*2 - 1
436    E225: hypot2 = x*x + y*y
437    E225: c = (a+b) * (a-b)
438    E225: c = alpha -4
439    E225: z = x **y
440    """
441    parens = 0
442    need_space = False
443    prev_type = tokenize.OP
444    prev_text = prev_end = None
445    for token_type, text, start, end, line in tokens:
446        if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
447            # ERRORTOKEN is triggered by backticks in Python 3000
448            continue
449        if text in ('(', 'lambda'):
450            parens += 1
451        elif text == ')':
452            parens -= 1
453        if need_space:
454            if start == prev_end:
455                return prev_end, "E225 missing whitespace around operator"
456            need_space = False
457        elif token_type == tokenize.OP:
458            if text == '=' and parens:
459                # Allow keyword args or defaults: foo(bar=None).
460                pass
461            elif text in BINARY_OPERATORS:
462                need_space = True
463            elif text in UNARY_OPERATORS:
464                if ((prev_type != tokenize.OP or prev_text in '}])') and not
465                    (prev_type == tokenize.NAME and iskeyword(prev_text))):
466                    # Allow unary operators: -123, -x, +1.
467                    # Allow argument unpacking: foo(*args, **kwargs).
468                    need_space = True
469            if need_space and start == prev_end:
470                return prev_end, "E225 missing whitespace around operator"
471        prev_type = token_type
472        prev_text = text
473        prev_end = end
474
475
476def whitespace_around_comma(logical_line):
477    """
478    Avoid extraneous whitespace in the following situations:
479
480    - More than one space around an assignment (or other) operator to
481      align it with another.
482
483    JCR: This should also be applied around comma etc.
484    Note: these checks are disabled by default
485
486    Okay: a = (1, 2)
487    E241: a = (1,  2)
488    E242: a = (1,\t2)
489    """
490    line = logical_line
491    for separator in ',;:':
492        found = line.find(separator + '  ')
493        if found > -1:
494            return found + 1, "E241 multiple spaces after '%s'" % separator
495        found = line.find(separator + '\t')
496        if found > -1:
497            return found + 1, "E242 tab after '%s'" % separator
498
499
500def whitespace_around_named_parameter_equals(logical_line):
501    """
502    Don't use spaces around the '=' sign when used to indicate a
503    keyword argument or a default parameter value.
504
505    Okay: def complex(real, imag=0.0):
506    Okay: return magic(r=real, i=imag)
507    Okay: boolean(a == b)
508    Okay: boolean(a != b)
509    Okay: boolean(a <= b)
510    Okay: boolean(a >= b)
511
512    E251: def complex(real, imag = 0.0):
513    E251: return magic(r = real, i = imag)
514    """
515    parens = 0
516    window = '   '
517    equal_ok = ['==', '!=', '<=', '>=']
518
519    for pos, c in enumerate(logical_line):
520        window = window[1:] + c
521        if parens:
522            if window[0] in WHITESPACE and window[1] == '=':
523                if window[1:] not in equal_ok:
524                    issue = "E251 no spaces around keyword / parameter equals"
525                    return pos, issue
526            if window[2] in WHITESPACE and window[1] == '=':
527                if window[:2] not in equal_ok:
528                    issue = "E251 no spaces around keyword / parameter equals"
529                    return pos, issue
530        if c == '(':
531            parens += 1
532        elif c == ')':
533            parens -= 1
534
535
536def whitespace_before_inline_comment(logical_line, tokens):
537    """
538    Separate inline comments by at least two spaces.
539
540    An inline comment is a comment on the same line as a statement.  Inline
541    comments should be separated by at least two spaces from the statement.
542    They should start with a # and a single space.
543
544    Okay: x = x + 1  # Increment x
545    Okay: x = x + 1    # Increment x
546    E261: x = x + 1 # Increment x
547    E262: x = x + 1  #Increment x
548    E262: x = x + 1  #  Increment x
549    """
550    prev_end = (0, 0)
551    for token_type, text, start, end, line in tokens:
552        if token_type == tokenize.NL:
553            continue
554        if token_type == tokenize.COMMENT:
555            if not line[:start[1]].strip():
556                continue
557            if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
558                return (prev_end,
559                        "E261 at least two spaces before inline comment")
560            if (len(text) > 1 and text.startswith('#  ')
561                           or not text.startswith('# ')):
562                return start, "E262 inline comment should start with '# '"
563        else:
564            prev_end = end
565
566
567def imports_on_separate_lines(logical_line):
568    r"""
569    Imports should usually be on separate lines.
570
571    Okay: import os\nimport sys
572    E401: import sys, os
573
574    Okay: from subprocess import Popen, PIPE
575    Okay: from myclas import MyClass
576    Okay: from foo.bar.yourclass import YourClass
577    Okay: import myclass
578    Okay: import foo.bar.yourclass
579    """
580    line = logical_line
581    if line.startswith('import '):
582        found = line.find(',')
583        if found > -1:
584            return found, "E401 multiple imports on one line"
585
586
587def compound_statements(logical_line):
588    r"""
589    Compound statements (multiple statements on the same line) are
590    generally discouraged.
591
592    While sometimes it's okay to put an if/for/while with a small body
593    on the same line, never do this for multi-clause statements. Also
594    avoid folding such long lines!
595
596    Okay: if foo == 'blah':\n    do_blah_thing()
597    Okay: do_one()
598    Okay: do_two()
599    Okay: do_three()
600
601    E701: if foo == 'blah': do_blah_thing()
602    E701: for x in lst: total += x
603    E701: while t < 10: t = delay()
604    E701: if foo == 'blah': do_blah_thing()
605    E701: else: do_non_blah_thing()
606    E701: try: something()
607    E701: finally: cleanup()
608    E701: if foo == 'blah': one(); two(); three()
609
610    E702: do_one(); do_two(); do_three()
611    """
612    line = logical_line
613    found = line.find(':')
614    if -1 < found < len(line) - 1:
615        before = line[:found]
616        if (before.count('{') <= before.count('}') and  # {'a': 1} (dict)
617            before.count('[') <= before.count(']') and  # [1:2] (slice)
618            not re.search(r'\blambda\b', before)):      # lambda x: x
619            return found, "E701 multiple statements on one line (colon)"
620    found = line.find(';')
621    if -1 < found:
622        return found, "E702 multiple statements on one line (semicolon)"
623
624
625def python_3000_has_key(logical_line):
626    """
627    The {}.has_key() method will be removed in the future version of
628    Python. Use the 'in' operation instead, like:
629    d = {"a": 1, "b": 2}
630    if "b" in d:
631        print d["b"]
632    """
633    pos = logical_line.find('.has_key(')
634    if pos > -1:
635        return pos, "W601 .has_key() is deprecated, use 'in'"
636
637
638def python_3000_raise_comma(logical_line):
639    """
640    When raising an exception, use "raise ValueError('message')"
641    instead of the older form "raise ValueError, 'message'".
642
643    The paren-using form is preferred because when the exception arguments
644    are long or include string formatting, you don't need to use line
645    continuation characters thanks to the containing parentheses.  The older
646    form will be removed in Python 3000.
647    """
648    match = RAISE_COMMA_REGEX.match(logical_line)
649    if match:
650        return match.start(1), "W602 deprecated form of raising exception"
651
652
653def python_3000_not_equal(logical_line):
654    """
655    != can also be written <>, but this is an obsolete usage kept for
656    backwards compatibility only. New code should always use !=.
657    The older syntax is removed in Python 3000.
658    """
659    pos = logical_line.find('<>')
660    if pos > -1:
661        return pos, "W603 '<>' is deprecated, use '!='"
662
663
664def python_3000_backticks(logical_line):
665    """
666    Backticks are removed in Python 3000.
667    Use repr() instead.
668    """
669    pos = logical_line.find('`')
670    if pos > -1:
671        return pos, "W604 backticks are deprecated, use 'repr()'"
672
673
674##############################################################################
675# Helper functions
676##############################################################################
677
678
679def expand_indent(line):
680    """
681    Return the amount of indentation.
682    Tabs are expanded to the next multiple of 8.
683
684    >>> expand_indent('    ')
685    4
686    >>> expand_indent('\\t')
687    8
688    >>> expand_indent('    \\t')
689    8
690    >>> expand_indent('       \\t')
691    8
692    >>> expand_indent('        \\t')
693    16
694    """
695    result = 0
696    for char in line:
697        if char == '\t':
698            result = result // 8 * 8 + 8
699        elif char == ' ':
700            result += 1
701        else:
702            break
703    return result
704
705
706def mute_string(text):
707    """
708    Replace contents with 'xxx' to prevent syntax matching.
709
710    >>> mute_string('"abc"')
711    '"xxx"'
712    >>> mute_string("'''abc'''")
713    "'''xxx'''"
714    >>> mute_string("r'abc'")
715    "r'xxx'"
716    """
717    start = 1
718    end = len(text) - 1
719    # String modifiers (e.g. u or r)
720    if text.endswith('"'):
721        start += text.index('"')
722    elif text.endswith("'"):
723        start += text.index("'")
724    # Triple quotes
725    if text.endswith('"""') or text.endswith("'''"):
726        start += 2
727        end -= 2
728    return text[:start] + 'x' * (end - start) + text[end:]
729
730
731def message(text):
732    """Print a message."""
733    # print >> sys.stderr, options.prog + ': ' + text
734    # print >> sys.stderr, text
735    print(text)
736
737
738##############################################################################
739# Framework to run all checks
740##############################################################################
741
742
743def find_checks(argument_name):
744    """
745    Find all globally visible functions where the first argument name
746    starts with argument_name.
747    """
748    checks = []
749    for name, function in globals().items():
750        if not inspect.isfunction(function):
751            continue
752        args = inspect.getargspec(function)[0]
753        if args and args[0].startswith(argument_name):
754            codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
755            for code in codes or ['']:
756                if not code or not ignore_code(code):
757                    checks.append((name, function, args))
758                    break
759    checks.sort()
760    return checks
761
762
763class Checker(object):
764    """
765    Load a Python source file, tokenize it, check coding style.
766    """
767
768    def __init__(self, filename):
769        if filename:
770            self.filename = filename
771            try:
772                self.lines = open(filename).readlines()
773            except UnicodeDecodeError:
774                # Errors may occur with non-UTF8 files in Python 3000
775                self.lines = open(filename, errors='replace').readlines()
776        else:
777            self.filename = 'stdin'
778            self.lines = []
779        options.counters['physical lines'] = \
780            options.counters.get('physical lines', 0) + len(self.lines)
781
782    def readline(self):
783        """
784        Get the next line from the input buffer.
785        """
786        self.line_number += 1
787        if self.line_number > len(self.lines):
788            return ''
789        return self.lines[self.line_number - 1]
790
791    def readline_check_physical(self):
792        """
793        Check and return the next physical line. This method can be
794        used to feed tokenize.generate_tokens.
795        """
796        line = self.readline()
797        if line:
798            self.check_physical(line)
799        return line
800
801    def run_check(self, check, argument_names):
802        """
803        Run a check plugin.
804        """
805        arguments = []
806        for name in argument_names:
807            arguments.append(getattr(self, name))
808        return check(*arguments)
809
810    def check_physical(self, line):
811        """
812        Run all physical checks on a raw input line.
813        """
814        self.physical_line = line
815        if self.indent_char is None and len(line) and line[0] in ' \t':
816            self.indent_char = line[0]
817        for name, check, argument_names in options.physical_checks:
818            result = self.run_check(check, argument_names)
819            if result is not None:
820                offset, text = result
821                self.report_error(self.line_number, offset, text, check)
822
823    def build_tokens_line(self):
824        """
825        Build a logical line from tokens.
826        """
827        self.mapping = []
828        logical = []
829        length = 0
830        previous = None
831        for token in self.tokens:
832            token_type, text = token[0:2]
833            if token_type in (tokenize.COMMENT, tokenize.NL,
834                              tokenize.INDENT, tokenize.DEDENT,
835                              tokenize.NEWLINE):
836                continue
837            if token_type == tokenize.STRING:
838                text = mute_string(text)
839            if previous:
840                end_line, end = previous[3]
841                start_line, start = token[2]
842                if end_line != start_line:  # different row
843                    if self.lines[end_line - 1][end - 1] not in '{[(':
844                        logical.append(' ')
845                        length += 1
846                elif end != start:  # different column
847                    fill = self.lines[end_line - 1][end:start]
848                    logical.append(fill)
849                    length += len(fill)
850            self.mapping.append((length, token))
851            logical.append(text)
852            length += len(text)
853            previous = token
854        self.logical_line = ''.join(logical)
855        assert self.logical_line.lstrip() == self.logical_line
856        assert self.logical_line.rstrip() == self.logical_line
857
858    def check_logical(self):
859        """
860        Build a line from tokens and run all logical checks on it.
861        """
862        options.counters['logical lines'] = \
863            options.counters.get('logical lines', 0) + 1
864        self.build_tokens_line()
865        first_line = self.lines[self.mapping[0][1][2][0] - 1]
866        indent = first_line[:self.mapping[0][1][2][1]]
867        self.previous_indent_level = self.indent_level
868        self.indent_level = expand_indent(indent)
869        if options.verbose >= 2:
870            print(self.logical_line[:80].rstrip())
871        for name, check, argument_names in options.logical_checks:
872            if options.verbose >= 3:
873                print('   ', name)
874            result = self.run_check(check, argument_names)
875            if result is not None:
876                offset, text = result
877                if isinstance(offset, tuple):
878                    original_number, original_offset = offset
879                else:
880                    for token_offset, token in self.mapping:
881                        if offset >= token_offset:
882                            original_number = token[2][0]
883                            original_offset = (token[2][1]
884                                               + offset - token_offset)
885                self.report_error(original_number, original_offset,
886                                  text, check)
887        self.previous_logical = self.logical_line
888
889    def check_all(self):
890        """
891        Run all checks on the input file.
892        """
893        self.file_errors = 0
894        self.line_number = 0
895        self.indent_char = None
896        self.indent_level = 0
897        self.previous_logical = ''
898        self.blank_lines = 0
899        self.blank_lines_before_comment = 0
900        self.tokens = []
901        parens = 0
902        for token in tokenize.generate_tokens(self.readline_check_physical):
903            # print(tokenize.tok_name[token[0]], repr(token))
904            self.tokens.append(token)
905            token_type, text = token[0:2]
906            if token_type == tokenize.OP and text in '([{':
907                parens += 1
908            if token_type == tokenize.OP and text in '}])':
909                parens -= 1
910            if token_type == tokenize.NEWLINE and not parens:
911                self.check_logical()
912                self.blank_lines = 0
913                self.blank_lines_before_comment = 0
914                self.tokens = []
915            if token_type == tokenize.NL and not parens:
916                if len(self.tokens) <= 1:
917                    # The physical line contains only this token.
918                    self.blank_lines += 1
919                self.tokens = []
920            if token_type == tokenize.COMMENT:
921                source_line = token[4]
922                token_start = token[2][1]
923                if source_line[:token_start].strip() == '':
924                    self.blank_lines_before_comment = max(self.blank_lines,
925                        self.blank_lines_before_comment)
926                    self.blank_lines = 0
927                if text.endswith('\n') and not parens:
928                    # The comment also ends a physical line.  This works around
929                    # Python < 2.6 behaviour, which does not generate NL after
930                    # a comment which is on a line by itself.
931                    self.tokens = []
932        return self.file_errors
933
934    def report_error(self, line_number, offset, text, check):
935        """
936        Report an error, according to options.
937        """
938        if options.quiet == 1 and not self.file_errors:
939            message(self.filename)
940        self.file_errors += 1
941        code = text[:4]
942        options.counters[code] = options.counters.get(code, 0) + 1
943        options.messages[code] = text[5:]
944        if options.quiet:
945            return
946        if options.testsuite:
947            basename = os.path.basename(self.filename)
948            if basename[:4] != code:
949                return  # Don't care about other errors or warnings
950            if 'not' not in basename:
951                return  # Don't print the expected error message
952        if ignore_code(code):
953            return
954        if options.counters[code] == 1 or options.repeat:
955            message("%s:%s:%d: %s" %
956                    (self.filename, line_number, offset + 1, text))
957            if options.show_source:
958                line = self.lines[line_number - 1]
959                message(line.rstrip())
960                message(' ' * offset + '^')
961            if options.show_pep8:
962                message(check.__doc__.lstrip('\n').rstrip())
963
964
965def input_file(filename):
966    """
967    Run all checks on a Python source file.
968    """
969    if excluded(filename):
970        return {}
971    if options.verbose:
972        message('checking ' + filename)
973    files_counter_before = options.counters.get('files', 0)
974    if options.testsuite:  # Keep showing errors for multiple tests
975        options.counters = {}
976    options.counters['files'] = files_counter_before + 1
977    errors = Checker(filename).check_all()
978    if options.testsuite:  # Check if the expected error was found
979        basename = os.path.basename(filename)
980        code = basename[:4]
981        count = options.counters.get(code, 0)
982        if count == 0 and 'not' not in basename:
983            message("%s: error %s not found" % (filename, code))
984
985
986def input_dir(dirname):
987    """
988    Check all Python source files in this directory and all subdirectories.
989    """
990    dirname = dirname.rstrip('/')
991    if excluded(dirname):
992        return
993    for root, dirs, files in os.walk(dirname):
994        if options.verbose:
995            message('directory ' + root)
996        options.counters['directories'] = \
997            options.counters.get('directories', 0) + 1
998        dirs.sort()
999        for subdir in dirs:
1000            if excluded(subdir):
1001                dirs.remove(subdir)
1002        files.sort()
1003        for filename in files:
1004            if filename_match(filename):
1005                input_file(os.path.join(root, filename))
1006
1007
1008def excluded(filename):
1009    """
1010    Check if options.exclude contains a pattern that matches filename.
1011    """
1012    basename = os.path.basename(filename)
1013    for pattern in options.exclude:
1014        if fnmatch(basename, pattern):
1015            # print basename, 'excluded because it matches', pattern
1016            return True
1017
1018
1019def filename_match(filename):
1020    """
1021    Check if options.filename contains a pattern that matches filename.
1022    If options.filename is unspecified, this always returns True.
1023    """
1024    if not options.filename:
1025        return True
1026    for pattern in options.filename:
1027        if fnmatch(filename, pattern):
1028            return True
1029
1030
1031def ignore_code(code):
1032    """
1033    Check if options.ignore contains a prefix of the error code.
1034    If options.select contains a prefix of the error code, do not ignore it.
1035    """
1036    for select in options.select:
1037        if code.startswith(select):
1038            return False
1039    for ignore in options.ignore:
1040        if code.startswith(ignore):
1041            return True
1042
1043
1044def get_error_statistics():
1045    """Get error statistics."""
1046    return get_statistics("E")
1047
1048
1049def get_warning_statistics():
1050    """Get warning statistics."""
1051    return get_statistics("W")
1052
1053
1054def get_statistics(prefix=''):
1055    """
1056    Get statistics for message codes that start with the prefix.
1057
1058    prefix='' matches all errors and warnings
1059    prefix='E' matches all errors
1060    prefix='W' matches all warnings
1061    prefix='E4' matches all errors that have to do with imports
1062    """
1063    stats = []
1064    keys = list(options.messages.keys())
1065    keys.sort()
1066    for key in keys:
1067        if key.startswith(prefix):
1068            stats.append('%-7s %s %s' %
1069                         (options.counters[key], key, options.messages[key]))
1070    return stats
1071
1072
1073def get_count(prefix=''):
1074    """Return the total count of errors and warnings."""
1075    keys = list(options.messages.keys())
1076    count = 0
1077    for key in keys:
1078        if key.startswith(prefix):
1079            count += options.counters[key]
1080    return count
1081
1082
1083def print_statistics(prefix=''):
1084    """Print overall statistics (number of errors and warnings)."""
1085    for line in get_statistics(prefix):
1086        print(line)
1087
1088
1089def print_benchmark(elapsed):
1090    """
1091    Print benchmark numbers.
1092    """
1093    print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
1094    keys = ['directories', 'files',
1095            'logical lines', 'physical lines']
1096    for key in keys:
1097        if key in options.counters:
1098            print('%-7d %s per second (%d total)' % (
1099                options.counters[key] / elapsed, key,
1100                options.counters[key]))
1101
1102
1103def selftest():
1104    """
1105    Test all check functions with test cases in docstrings.
1106    """
1107    count_passed = 0
1108    count_failed = 0
1109    checks = options.physical_checks + options.logical_checks
1110    for name, check, argument_names in checks:
1111        for line in check.__doc__.splitlines():
1112            line = line.lstrip()
1113            match = SELFTEST_REGEX.match(line)
1114            if match is None:
1115                continue
1116            code, source = match.groups()
1117            checker = Checker(None)
1118            for part in source.split(r'\n'):
1119                part = part.replace(r'\t', '\t')
1120                part = part.replace(r'\s', ' ')
1121                checker.lines.append(part + '\n')
1122            options.quiet = 2
1123            options.counters = {}
1124            checker.check_all()
1125            error = None
1126            if code == 'Okay':
1127                if len(options.counters) > 1:
1128                    codes = [key for key in options.counters.keys()
1129                             if key != 'logical lines']
1130                    error = "incorrectly found %s" % ', '.join(codes)
1131            elif options.counters.get(code, 0) == 0:
1132                error = "failed to find %s" % code
1133            if not error:
1134                count_passed += 1
1135            else:
1136                count_failed += 1
1137                if len(checker.lines) == 1:
1138                    print("pep8.py: %s: %s" %
1139                          (error, checker.lines[0].rstrip()))
1140                else:
1141                    print("pep8.py: %s:" % error)
1142                    for line in checker.lines:
1143                        print(line.rstrip())
1144    if options.verbose:
1145        print("%d passed and %d failed." % (count_passed, count_failed))
1146        if count_failed:
1147            print("Test failed.")
1148        else:
1149            print("Test passed.")
1150
1151
1152def process_options(arglist=None):
1153    """
1154    Process options passed either via arglist or via command line args.
1155    """
1156    global options, args
1157    parser = OptionParser(version=__version__,
1158                          usage="%prog [options] input ...")
1159    parser.add_option('-v', '--verbose', default=0, action='count',
1160                      help="print status messages, or debug with -vv")
1161    parser.add_option('-q', '--quiet', default=0, action='count',
1162                      help="report only file names, or nothing with -qq")
1163    parser.add_option('-r', '--repeat', action='store_true',
1164                      help="show all occurrences of the same error")
1165    parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1166                      help="exclude files or directories which match these "
1167                        "comma separated patterns (default: %s)" %
1168                        DEFAULT_EXCLUDE)
1169    parser.add_option('--filename', metavar='patterns', default='*.py',
1170                      help="when parsing directories, only check filenames "
1171                        "matching these comma separated patterns (default: "
1172                        "*.py)")
1173    parser.add_option('--select', metavar='errors', default='',
1174                      help="select errors and warnings (e.g. E,W6)")
1175    parser.add_option('--ignore', metavar='errors', default='',
1176                      help="skip errors and warnings (e.g. E4,W)")
1177    parser.add_option('--show-source', action='store_true',
1178                      help="show source code for each error")
1179    parser.add_option('--show-pep8', action='store_true',
1180                      help="show text of PEP 8 for each error")
1181    parser.add_option('--statistics', action='store_true',
1182                      help="count errors and warnings")
1183    parser.add_option('--count', action='store_true',
1184                      help="print total number of errors and warnings "
1185                        "to standard error and set exit code to 1 if "
1186                        "total is not null")
1187    parser.add_option('--benchmark', action='store_true',
1188                      help="measure processing speed")
1189    parser.add_option('--testsuite', metavar='dir',
1190                      help="run regression tests from dir")
1191    parser.add_option('--doctest', action='store_true',
1192                      help="run doctest on myself")
1193    options, args = parser.parse_args(arglist)
1194    if options.testsuite:
1195        args.append(options.testsuite)
1196    if len(args) == 0 and not options.doctest:
1197        parser.error('input not specified')
1198    options.prog = os.path.basename(sys.argv[0])
1199    options.exclude = options.exclude.split(',')
1200    for index in range(len(options.exclude)):
1201        options.exclude[index] = options.exclude[index].rstrip('/')
1202    if options.filename:
1203        options.filename = options.filename.split(',')
1204    if options.select:
1205        options.select = options.select.split(',')
1206    else:
1207        options.select = []
1208    if options.ignore:
1209        options.ignore = options.ignore.split(',')
1210    elif options.select:
1211        # Ignore all checks which are not explicitly selected
1212        options.ignore = ['']
1213    elif options.testsuite or options.doctest:
1214        # For doctest and testsuite, all checks are required
1215        options.ignore = []
1216    else:
1217        # The default choice: ignore controversial checks
1218        options.ignore = DEFAULT_IGNORE
1219    options.physical_checks = find_checks('physical_line')
1220    options.logical_checks = find_checks('logical_line')
1221    options.counters = {}
1222    options.messages = {}
1223    return options, args
1224
1225
1226def _main():
1227    """
1228    Parse options and run checks on Python source.
1229    """
1230    options, args = process_options()
1231    if options.doctest:
1232        import doctest
1233        doctest.testmod(verbose=options.verbose)
1234        selftest()
1235    start_time = time.time()
1236    for path in args:
1237        if os.path.isdir(path):
1238            input_dir(path)
1239        else:
1240            input_file(path)
1241    elapsed = time.time() - start_time
1242    if options.statistics:
1243        print_statistics()
1244    if options.benchmark:
1245        print_benchmark(elapsed)
1246    if options.count:
1247        count = get_count()
1248        if count:
1249            sys.stderr.write(str(count) + '\n')
1250            sys.exit(1)
1251
1252
1253if __name__ == '__main__':
1254    _main()
1255