• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/local/bin/python
2
3# NOTE: the above "/usr/local/bin/python" is NOT a mistake.  It is
4# intentionally NOT "/usr/bin/env python".  On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python.  Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin.  So let those vendors patch cgi.py to match their choice
10# of installation.
11
12"""Support module for CGI (Common Gateway Interface) scripts.
13
14This module defines a number of utilities for use by CGI scripts
15written in Python.
16"""
17
18# History
19# -------
20#
21# Michael McLay started this module.  Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict.  The multipart
23# parsing was inspired by code submitted by Andreas Paepcke.  Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
26#
27
28__version__ = "2.6"
29
30
31# Imports
32# =======
33
34from io import StringIO, BytesIO, TextIOWrapper
35from collections import Mapping
36import sys
37import os
38import urllib.parse
39from email.parser import FeedParser
40from email.message import Message
41from warnings import warn
42import html
43import locale
44import tempfile
45
46__all__ = ["MiniFieldStorage", "FieldStorage",
47           "parse", "parse_qs", "parse_qsl", "parse_multipart",
48           "parse_header", "test", "print_exception", "print_environ",
49           "print_form", "print_directory", "print_arguments",
50           "print_environ_usage", "escape"]
51
52# Logging support
53# ===============
54
55logfile = ""            # Filename to log to, if not empty
56logfp = None            # File object to log to, if not None
57
58def initlog(*allargs):
59    """Write a log message, if there is a log file.
60
61    Even though this function is called initlog(), you should always
62    use log(); log is a variable that is set either to initlog
63    (initially), to dolog (once the log file has been opened), or to
64    nolog (when logging is disabled).
65
66    The first argument is a format string; the remaining arguments (if
67    any) are arguments to the % operator, so e.g.
68        log("%s: %s", "a", "b")
69    will write "a: b" to the log file, followed by a newline.
70
71    If the global logfp is not None, it should be a file object to
72    which log data is written.
73
74    If the global logfp is None, the global logfile may be a string
75    giving a filename to open, in append mode.  This file should be
76    world writable!!!  If the file can't be opened, logging is
77    silently disabled (since there is no safe place where we could
78    send an error message).
79
80    """
81    global log, logfile, logfp
82    if logfile and not logfp:
83        try:
84            logfp = open(logfile, "a")
85        except OSError:
86            pass
87    if not logfp:
88        log = nolog
89    else:
90        log = dolog
91    log(*allargs)
92
93def dolog(fmt, *args):
94    """Write a log message to the log file.  See initlog() for docs."""
95    logfp.write(fmt%args + "\n")
96
97def nolog(*allargs):
98    """Dummy function, assigned to log when logging is disabled."""
99    pass
100
101def closelog():
102    """Close the log file."""
103    global log, logfile, logfp
104    logfile = ''
105    if logfp:
106        logfp.close()
107        logfp = None
108    log = initlog
109
110log = initlog           # The current logging function
111
112
113# Parsing functions
114# =================
115
116# Maximum input we will accept when REQUEST_METHOD is POST
117# 0 ==> unlimited input
118maxlen = 0
119
120def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
121    """Parse a query in the environment or from a file (default stdin)
122
123        Arguments, all optional:
124
125        fp              : file pointer; default: sys.stdin.buffer
126
127        environ         : environment dictionary; default: os.environ
128
129        keep_blank_values: flag indicating whether blank values in
130            percent-encoded forms should be treated as blank strings.
131            A true value indicates that blanks should be retained as
132            blank strings.  The default false value indicates that
133            blank values are to be ignored and treated as if they were
134            not included.
135
136        strict_parsing: flag indicating what to do with parsing errors.
137            If false (the default), errors are silently ignored.
138            If true, errors raise a ValueError exception.
139    """
140    if fp is None:
141        fp = sys.stdin
142
143    # field keys and values (except for files) are returned as strings
144    # an encoding is required to decode the bytes read from self.fp
145    if hasattr(fp,'encoding'):
146        encoding = fp.encoding
147    else:
148        encoding = 'latin-1'
149
150    # fp.read() must return bytes
151    if isinstance(fp, TextIOWrapper):
152        fp = fp.buffer
153
154    if not 'REQUEST_METHOD' in environ:
155        environ['REQUEST_METHOD'] = 'GET'       # For testing stand-alone
156    if environ['REQUEST_METHOD'] == 'POST':
157        ctype, pdict = parse_header(environ['CONTENT_TYPE'])
158        if ctype == 'multipart/form-data':
159            return parse_multipart(fp, pdict)
160        elif ctype == 'application/x-www-form-urlencoded':
161            clength = int(environ['CONTENT_LENGTH'])
162            if maxlen and clength > maxlen:
163                raise ValueError('Maximum content length exceeded')
164            qs = fp.read(clength).decode(encoding)
165        else:
166            qs = ''                     # Unknown content-type
167        if 'QUERY_STRING' in environ:
168            if qs: qs = qs + '&'
169            qs = qs + environ['QUERY_STRING']
170        elif sys.argv[1:]:
171            if qs: qs = qs + '&'
172            qs = qs + sys.argv[1]
173        environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
174    elif 'QUERY_STRING' in environ:
175        qs = environ['QUERY_STRING']
176    else:
177        if sys.argv[1:]:
178            qs = sys.argv[1]
179        else:
180            qs = ""
181        environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
182    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
183                                 encoding=encoding)
184
185
186# parse query string function called from urlparse,
187# this is done in order to maintain backward compatibility.
188
189def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
190    """Parse a query given as a string argument."""
191    warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
192         DeprecationWarning, 2)
193    return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
194
195def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
196    """Parse a query given as a string argument."""
197    warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
198         DeprecationWarning, 2)
199    return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
200
201def parse_multipart(fp, pdict):
202    """Parse multipart input.
203
204    Arguments:
205    fp   : input file
206    pdict: dictionary containing other parameters of content-type header
207
208    Returns a dictionary just like parse_qs(): keys are the field names, each
209    value is a list of values for that field.  This is easy to use but not
210    much good if you are expecting megabytes to be uploaded -- in that case,
211    use the FieldStorage class instead which is much more flexible.  Note
212    that content-type is the raw, unparsed contents of the content-type
213    header.
214
215    XXX This does not parse nested multipart parts -- use FieldStorage for
216    that.
217
218    XXX This should really be subsumed by FieldStorage altogether -- no
219    point in having two implementations of the same parsing algorithm.
220    Also, FieldStorage protects itself better against certain DoS attacks
221    by limiting the size of the data read in one chunk.  The API here
222    does not support that kind of protection.  This also affects parse()
223    since it can call parse_multipart().
224
225    """
226    import http.client
227
228    boundary = b""
229    if 'boundary' in pdict:
230        boundary = pdict['boundary']
231    if not valid_boundary(boundary):
232        raise ValueError('Invalid boundary in multipart form: %r'
233                            % (boundary,))
234
235    nextpart = b"--" + boundary
236    lastpart = b"--" + boundary + b"--"
237    partdict = {}
238    terminator = b""
239
240    while terminator != lastpart:
241        bytes = -1
242        data = None
243        if terminator:
244            # At start of next part.  Read headers first.
245            headers = http.client.parse_headers(fp)
246            clength = headers.get('content-length')
247            if clength:
248                try:
249                    bytes = int(clength)
250                except ValueError:
251                    pass
252            if bytes > 0:
253                if maxlen and bytes > maxlen:
254                    raise ValueError('Maximum content length exceeded')
255                data = fp.read(bytes)
256            else:
257                data = b""
258        # Read lines until end of part.
259        lines = []
260        while 1:
261            line = fp.readline()
262            if not line:
263                terminator = lastpart # End outer loop
264                break
265            if line.startswith(b"--"):
266                terminator = line.rstrip()
267                if terminator in (nextpart, lastpart):
268                    break
269            lines.append(line)
270        # Done with part.
271        if data is None:
272            continue
273        if bytes < 0:
274            if lines:
275                # Strip final line terminator
276                line = lines[-1]
277                if line[-2:] == b"\r\n":
278                    line = line[:-2]
279                elif line[-1:] == b"\n":
280                    line = line[:-1]
281                lines[-1] = line
282                data = b"".join(lines)
283        line = headers['content-disposition']
284        if not line:
285            continue
286        key, params = parse_header(line)
287        if key != 'form-data':
288            continue
289        if 'name' in params:
290            name = params['name']
291        else:
292            continue
293        if name in partdict:
294            partdict[name].append(data)
295        else:
296            partdict[name] = [data]
297
298    return partdict
299
300
301def _parseparam(s):
302    while s[:1] == ';':
303        s = s[1:]
304        end = s.find(';')
305        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
306            end = s.find(';', end + 1)
307        if end < 0:
308            end = len(s)
309        f = s[:end]
310        yield f.strip()
311        s = s[end:]
312
313def parse_header(line):
314    """Parse a Content-type like header.
315
316    Return the main content-type and a dictionary of options.
317
318    """
319    parts = _parseparam(';' + line)
320    key = parts.__next__()
321    pdict = {}
322    for p in parts:
323        i = p.find('=')
324        if i >= 0:
325            name = p[:i].strip().lower()
326            value = p[i+1:].strip()
327            if len(value) >= 2 and value[0] == value[-1] == '"':
328                value = value[1:-1]
329                value = value.replace('\\\\', '\\').replace('\\"', '"')
330            pdict[name] = value
331    return key, pdict
332
333
334# Classes for field storage
335# =========================
336
337class MiniFieldStorage:
338
339    """Like FieldStorage, for use when no file uploads are possible."""
340
341    # Dummy attributes
342    filename = None
343    list = None
344    type = None
345    file = None
346    type_options = {}
347    disposition = None
348    disposition_options = {}
349    headers = {}
350
351    def __init__(self, name, value):
352        """Constructor from field name and value."""
353        self.name = name
354        self.value = value
355        # self.file = StringIO(value)
356
357    def __repr__(self):
358        """Return printable representation."""
359        return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
360
361
362class FieldStorage:
363
364    """Store a sequence of fields, reading multipart/form-data.
365
366    This class provides naming, typing, files stored on disk, and
367    more.  At the top level, it is accessible like a dictionary, whose
368    keys are the field names.  (Note: None can occur as a field name.)
369    The items are either a Python list (if there's multiple values) or
370    another FieldStorage or MiniFieldStorage object.  If it's a single
371    object, it has the following attributes:
372
373    name: the field name, if specified; otherwise None
374
375    filename: the filename, if specified; otherwise None; this is the
376        client side filename, *not* the file name on which it is
377        stored (that's a temporary file you don't deal with)
378
379    value: the value as a *string*; for file uploads, this
380        transparently reads the file every time you request the value
381        and returns *bytes*
382
383    file: the file(-like) object from which you can read the data *as
384        bytes* ; None if the data is stored a simple string
385
386    type: the content-type, or None if not specified
387
388    type_options: dictionary of options specified on the content-type
389        line
390
391    disposition: content-disposition, or None if not specified
392
393    disposition_options: dictionary of corresponding options
394
395    headers: a dictionary(-like) object (sometimes email.message.Message or a
396        subclass thereof) containing *all* headers
397
398    The class is subclassable, mostly for the purpose of overriding
399    the make_file() method, which is called internally to come up with
400    a file open for reading and writing.  This makes it possible to
401    override the default choice of storing all files in a temporary
402    directory and unlinking them as soon as they have been opened.
403
404    """
405    def __init__(self, fp=None, headers=None, outerboundary=b'',
406                 environ=os.environ, keep_blank_values=0, strict_parsing=0,
407                 limit=None, encoding='utf-8', errors='replace'):
408        """Constructor.  Read multipart/* until last part.
409
410        Arguments, all optional:
411
412        fp              : file pointer; default: sys.stdin.buffer
413            (not used when the request method is GET)
414            Can be :
415            1. a TextIOWrapper object
416            2. an object whose read() and readline() methods return bytes
417
418        headers         : header dictionary-like object; default:
419            taken from environ as per CGI spec
420
421        outerboundary   : terminating multipart boundary
422            (for internal use only)
423
424        environ         : environment dictionary; default: os.environ
425
426        keep_blank_values: flag indicating whether blank values in
427            percent-encoded forms should be treated as blank strings.
428            A true value indicates that blanks should be retained as
429            blank strings.  The default false value indicates that
430            blank values are to be ignored and treated as if they were
431            not included.
432
433        strict_parsing: flag indicating what to do with parsing errors.
434            If false (the default), errors are silently ignored.
435            If true, errors raise a ValueError exception.
436
437        limit : used internally to read parts of multipart/form-data forms,
438            to exit from the reading loop when reached. It is the difference
439            between the form content-length and the number of bytes already
440            read
441
442        encoding, errors : the encoding and error handler used to decode the
443            binary stream to strings. Must be the same as the charset defined
444            for the page sending the form (content-type : meta http-equiv or
445            header)
446
447        """
448        method = 'GET'
449        self.keep_blank_values = keep_blank_values
450        self.strict_parsing = strict_parsing
451        if 'REQUEST_METHOD' in environ:
452            method = environ['REQUEST_METHOD'].upper()
453        self.qs_on_post = None
454        if method == 'GET' or method == 'HEAD':
455            if 'QUERY_STRING' in environ:
456                qs = environ['QUERY_STRING']
457            elif sys.argv[1:]:
458                qs = sys.argv[1]
459            else:
460                qs = ""
461            qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
462            fp = BytesIO(qs)
463            if headers is None:
464                headers = {'content-type':
465                           "application/x-www-form-urlencoded"}
466        if headers is None:
467            headers = {}
468            if method == 'POST':
469                # Set default content-type for POST to what's traditional
470                headers['content-type'] = "application/x-www-form-urlencoded"
471            if 'CONTENT_TYPE' in environ:
472                headers['content-type'] = environ['CONTENT_TYPE']
473            if 'QUERY_STRING' in environ:
474                self.qs_on_post = environ['QUERY_STRING']
475            if 'CONTENT_LENGTH' in environ:
476                headers['content-length'] = environ['CONTENT_LENGTH']
477        else:
478            if not (isinstance(headers, (Mapping, Message))):
479                raise TypeError("headers must be mapping or an instance of "
480                                "email.message.Message")
481        self.headers = headers
482        if fp is None:
483            self.fp = sys.stdin.buffer
484        # self.fp.read() must return bytes
485        elif isinstance(fp, TextIOWrapper):
486            self.fp = fp.buffer
487        else:
488            if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
489                raise TypeError("fp must be file pointer")
490            self.fp = fp
491
492        self.encoding = encoding
493        self.errors = errors
494
495        if not isinstance(outerboundary, bytes):
496            raise TypeError('outerboundary must be bytes, not %s'
497                            % type(outerboundary).__name__)
498        self.outerboundary = outerboundary
499
500        self.bytes_read = 0
501        self.limit = limit
502
503        # Process content-disposition header
504        cdisp, pdict = "", {}
505        if 'content-disposition' in self.headers:
506            cdisp, pdict = parse_header(self.headers['content-disposition'])
507        self.disposition = cdisp
508        self.disposition_options = pdict
509        self.name = None
510        if 'name' in pdict:
511            self.name = pdict['name']
512        self.filename = None
513        if 'filename' in pdict:
514            self.filename = pdict['filename']
515        self._binary_file = self.filename is not None
516
517        # Process content-type header
518        #
519        # Honor any existing content-type header.  But if there is no
520        # content-type header, use some sensible defaults.  Assume
521        # outerboundary is "" at the outer level, but something non-false
522        # inside a multi-part.  The default for an inner part is text/plain,
523        # but for an outer part it should be urlencoded.  This should catch
524        # bogus clients which erroneously forget to include a content-type
525        # header.
526        #
527        # See below for what we do if there does exist a content-type header,
528        # but it happens to be something we don't understand.
529        if 'content-type' in self.headers:
530            ctype, pdict = parse_header(self.headers['content-type'])
531        elif self.outerboundary or method != 'POST':
532            ctype, pdict = "text/plain", {}
533        else:
534            ctype, pdict = 'application/x-www-form-urlencoded', {}
535        self.type = ctype
536        self.type_options = pdict
537        if 'boundary' in pdict:
538            self.innerboundary = pdict['boundary'].encode(self.encoding)
539        else:
540            self.innerboundary = b""
541
542        clen = -1
543        if 'content-length' in self.headers:
544            try:
545                clen = int(self.headers['content-length'])
546            except ValueError:
547                pass
548            if maxlen and clen > maxlen:
549                raise ValueError('Maximum content length exceeded')
550        self.length = clen
551        if self.limit is None and clen:
552            self.limit = clen
553
554        self.list = self.file = None
555        self.done = 0
556        if ctype == 'application/x-www-form-urlencoded':
557            self.read_urlencoded()
558        elif ctype[:10] == 'multipart/':
559            self.read_multi(environ, keep_blank_values, strict_parsing)
560        else:
561            self.read_single()
562
563    def __del__(self):
564        try:
565            self.file.close()
566        except AttributeError:
567            pass
568
569    def __enter__(self):
570        return self
571
572    def __exit__(self, *args):
573        self.file.close()
574
575    def __repr__(self):
576        """Return a printable representation."""
577        return "FieldStorage(%r, %r, %r)" % (
578                self.name, self.filename, self.value)
579
580    def __iter__(self):
581        return iter(self.keys())
582
583    def __getattr__(self, name):
584        if name != 'value':
585            raise AttributeError(name)
586        if self.file:
587            self.file.seek(0)
588            value = self.file.read()
589            self.file.seek(0)
590        elif self.list is not None:
591            value = self.list
592        else:
593            value = None
594        return value
595
596    def __getitem__(self, key):
597        """Dictionary style indexing."""
598        if self.list is None:
599            raise TypeError("not indexable")
600        found = []
601        for item in self.list:
602            if item.name == key: found.append(item)
603        if not found:
604            raise KeyError(key)
605        if len(found) == 1:
606            return found[0]
607        else:
608            return found
609
610    def getvalue(self, key, default=None):
611        """Dictionary style get() method, including 'value' lookup."""
612        if key in self:
613            value = self[key]
614            if isinstance(value, list):
615                return [x.value for x in value]
616            else:
617                return value.value
618        else:
619            return default
620
621    def getfirst(self, key, default=None):
622        """ Return the first value received."""
623        if key in self:
624            value = self[key]
625            if isinstance(value, list):
626                return value[0].value
627            else:
628                return value.value
629        else:
630            return default
631
632    def getlist(self, key):
633        """ Return list of received values."""
634        if key in self:
635            value = self[key]
636            if isinstance(value, list):
637                return [x.value for x in value]
638            else:
639                return [value.value]
640        else:
641            return []
642
643    def keys(self):
644        """Dictionary style keys() method."""
645        if self.list is None:
646            raise TypeError("not indexable")
647        return list(set(item.name for item in self.list))
648
649    def __contains__(self, key):
650        """Dictionary style __contains__ method."""
651        if self.list is None:
652            raise TypeError("not indexable")
653        return any(item.name == key for item in self.list)
654
655    def __len__(self):
656        """Dictionary style len(x) support."""
657        return len(self.keys())
658
659    def __bool__(self):
660        if self.list is None:
661            raise TypeError("Cannot be converted to bool.")
662        return bool(self.list)
663
664    def read_urlencoded(self):
665        """Internal: read data in query string format."""
666        qs = self.fp.read(self.length)
667        if not isinstance(qs, bytes):
668            raise ValueError("%s should return bytes, got %s" \
669                             % (self.fp, type(qs).__name__))
670        qs = qs.decode(self.encoding, self.errors)
671        if self.qs_on_post:
672            qs += '&' + self.qs_on_post
673        self.list = []
674        query = urllib.parse.parse_qsl(
675            qs, self.keep_blank_values, self.strict_parsing,
676            encoding=self.encoding, errors=self.errors)
677        for key, value in query:
678            self.list.append(MiniFieldStorage(key, value))
679        self.skip_lines()
680
681    FieldStorageClass = None
682
683    def read_multi(self, environ, keep_blank_values, strict_parsing):
684        """Internal: read a part that is itself multipart."""
685        ib = self.innerboundary
686        if not valid_boundary(ib):
687            raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
688        self.list = []
689        if self.qs_on_post:
690            query = urllib.parse.parse_qsl(
691                self.qs_on_post, self.keep_blank_values, self.strict_parsing,
692                encoding=self.encoding, errors=self.errors)
693            for key, value in query:
694                self.list.append(MiniFieldStorage(key, value))
695
696        klass = self.FieldStorageClass or self.__class__
697        first_line = self.fp.readline() # bytes
698        if not isinstance(first_line, bytes):
699            raise ValueError("%s should return bytes, got %s" \
700                             % (self.fp, type(first_line).__name__))
701        self.bytes_read += len(first_line)
702
703        # Ensure that we consume the file until we've hit our inner boundary
704        while (first_line.strip() != (b"--" + self.innerboundary) and
705                first_line):
706            first_line = self.fp.readline()
707            self.bytes_read += len(first_line)
708
709        while True:
710            parser = FeedParser()
711            hdr_text = b""
712            while True:
713                data = self.fp.readline()
714                hdr_text += data
715                if not data.strip():
716                    break
717            if not hdr_text:
718                break
719            # parser takes strings, not bytes
720            self.bytes_read += len(hdr_text)
721            parser.feed(hdr_text.decode(self.encoding, self.errors))
722            headers = parser.close()
723
724            # Some clients add Content-Length for part headers, ignore them
725            if 'content-length' in headers:
726                del headers['content-length']
727
728            part = klass(self.fp, headers, ib, environ, keep_blank_values,
729                         strict_parsing,self.limit-self.bytes_read,
730                         self.encoding, self.errors)
731            self.bytes_read += part.bytes_read
732            self.list.append(part)
733            if part.done or self.bytes_read >= self.length > 0:
734                break
735        self.skip_lines()
736
737    def read_single(self):
738        """Internal: read an atomic part."""
739        if self.length >= 0:
740            self.read_binary()
741            self.skip_lines()
742        else:
743            self.read_lines()
744        self.file.seek(0)
745
746    bufsize = 8*1024            # I/O buffering size for copy to file
747
748    def read_binary(self):
749        """Internal: read binary data."""
750        self.file = self.make_file()
751        todo = self.length
752        if todo >= 0:
753            while todo > 0:
754                data = self.fp.read(min(todo, self.bufsize)) # bytes
755                if not isinstance(data, bytes):
756                    raise ValueError("%s should return bytes, got %s"
757                                     % (self.fp, type(data).__name__))
758                self.bytes_read += len(data)
759                if not data:
760                    self.done = -1
761                    break
762                self.file.write(data)
763                todo = todo - len(data)
764
765    def read_lines(self):
766        """Internal: read lines until EOF or outerboundary."""
767        if self._binary_file:
768            self.file = self.__file = BytesIO() # store data as bytes for files
769        else:
770            self.file = self.__file = StringIO() # as strings for other fields
771        if self.outerboundary:
772            self.read_lines_to_outerboundary()
773        else:
774            self.read_lines_to_eof()
775
776    def __write(self, line):
777        """line is always bytes, not string"""
778        if self.__file is not None:
779            if self.__file.tell() + len(line) > 1000:
780                self.file = self.make_file()
781                data = self.__file.getvalue()
782                self.file.write(data)
783                self.__file = None
784        if self._binary_file:
785            # keep bytes
786            self.file.write(line)
787        else:
788            # decode to string
789            self.file.write(line.decode(self.encoding, self.errors))
790
791    def read_lines_to_eof(self):
792        """Internal: read lines until EOF."""
793        while 1:
794            line = self.fp.readline(1<<16) # bytes
795            self.bytes_read += len(line)
796            if not line:
797                self.done = -1
798                break
799            self.__write(line)
800
801    def read_lines_to_outerboundary(self):
802        """Internal: read lines until outerboundary.
803        Data is read as bytes: boundaries and line ends must be converted
804        to bytes for comparisons.
805        """
806        next_boundary = b"--" + self.outerboundary
807        last_boundary = next_boundary + b"--"
808        delim = b""
809        last_line_lfend = True
810        _read = 0
811        while 1:
812            if _read >= self.limit:
813                break
814            line = self.fp.readline(1<<16) # bytes
815            self.bytes_read += len(line)
816            _read += len(line)
817            if not line:
818                self.done = -1
819                break
820            if delim == b"\r":
821                line = delim + line
822                delim = b""
823            if line.startswith(b"--") and last_line_lfend:
824                strippedline = line.rstrip()
825                if strippedline == next_boundary:
826                    break
827                if strippedline == last_boundary:
828                    self.done = 1
829                    break
830            odelim = delim
831            if line.endswith(b"\r\n"):
832                delim = b"\r\n"
833                line = line[:-2]
834                last_line_lfend = True
835            elif line.endswith(b"\n"):
836                delim = b"\n"
837                line = line[:-1]
838                last_line_lfend = True
839            elif line.endswith(b"\r"):
840                # We may interrupt \r\n sequences if they span the 2**16
841                # byte boundary
842                delim = b"\r"
843                line = line[:-1]
844                last_line_lfend = False
845            else:
846                delim = b""
847                last_line_lfend = False
848            self.__write(odelim + line)
849
850    def skip_lines(self):
851        """Internal: skip lines until outer boundary if defined."""
852        if not self.outerboundary or self.done:
853            return
854        next_boundary = b"--" + self.outerboundary
855        last_boundary = next_boundary + b"--"
856        last_line_lfend = True
857        while True:
858            line = self.fp.readline(1<<16)
859            self.bytes_read += len(line)
860            if not line:
861                self.done = -1
862                break
863            if line.endswith(b"--") and last_line_lfend:
864                strippedline = line.strip()
865                if strippedline == next_boundary:
866                    break
867                if strippedline == last_boundary:
868                    self.done = 1
869                    break
870            last_line_lfend = line.endswith(b'\n')
871
872    def make_file(self):
873        """Overridable: return a readable & writable file.
874
875        The file will be used as follows:
876        - data is written to it
877        - seek(0)
878        - data is read from it
879
880        The file is opened in binary mode for files, in text mode
881        for other fields
882
883        This version opens a temporary file for reading and writing,
884        and immediately deletes (unlinks) it.  The trick (on Unix!) is
885        that the file can still be used, but it can't be opened by
886        another process, and it will automatically be deleted when it
887        is closed or when the current process terminates.
888
889        If you want a more permanent file, you derive a class which
890        overrides this method.  If you want a visible temporary file
891        that is nevertheless automatically deleted when the script
892        terminates, try defining a __del__ method in a derived class
893        which unlinks the temporary files you have created.
894
895        """
896        if self._binary_file:
897            return tempfile.TemporaryFile("wb+")
898        else:
899            return tempfile.TemporaryFile("w+",
900                encoding=self.encoding, newline = '\n')
901
902
903# Test/debug code
904# ===============
905
906def test(environ=os.environ):
907    """Robust test CGI script, usable as main program.
908
909    Write minimal HTTP headers and dump all information provided to
910    the script in HTML form.
911
912    """
913    print("Content-type: text/html")
914    print()
915    sys.stderr = sys.stdout
916    try:
917        form = FieldStorage()   # Replace with other classes to test those
918        print_directory()
919        print_arguments()
920        print_form(form)
921        print_environ(environ)
922        print_environ_usage()
923        def f():
924            exec("testing print_exception() -- <I>italics?</I>")
925        def g(f=f):
926            f()
927        print("<H3>What follows is a test, not an actual exception:</H3>")
928        g()
929    except:
930        print_exception()
931
932    print("<H1>Second try with a small maxlen...</H1>")
933
934    global maxlen
935    maxlen = 50
936    try:
937        form = FieldStorage()   # Replace with other classes to test those
938        print_directory()
939        print_arguments()
940        print_form(form)
941        print_environ(environ)
942    except:
943        print_exception()
944
945def print_exception(type=None, value=None, tb=None, limit=None):
946    if type is None:
947        type, value, tb = sys.exc_info()
948    import traceback
949    print()
950    print("<H3>Traceback (most recent call last):</H3>")
951    list = traceback.format_tb(tb, limit) + \
952           traceback.format_exception_only(type, value)
953    print("<PRE>%s<B>%s</B></PRE>" % (
954        html.escape("".join(list[:-1])),
955        html.escape(list[-1]),
956        ))
957    del tb
958
959def print_environ(environ=os.environ):
960    """Dump the shell environment as HTML."""
961    keys = sorted(environ.keys())
962    print()
963    print("<H3>Shell Environment:</H3>")
964    print("<DL>")
965    for key in keys:
966        print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
967    print("</DL>")
968    print()
969
970def print_form(form):
971    """Dump the contents of a form as HTML."""
972    keys = sorted(form.keys())
973    print()
974    print("<H3>Form Contents:</H3>")
975    if not keys:
976        print("<P>No form fields.")
977    print("<DL>")
978    for key in keys:
979        print("<DT>" + html.escape(key) + ":", end=' ')
980        value = form[key]
981        print("<i>" + html.escape(repr(type(value))) + "</i>")
982        print("<DD>" + html.escape(repr(value)))
983    print("</DL>")
984    print()
985
986def print_directory():
987    """Dump the current directory as HTML."""
988    print()
989    print("<H3>Current Working Directory:</H3>")
990    try:
991        pwd = os.getcwd()
992    except OSError as msg:
993        print("OSError:", html.escape(str(msg)))
994    else:
995        print(html.escape(pwd))
996    print()
997
998def print_arguments():
999    print()
1000    print("<H3>Command Line Arguments:</H3>")
1001    print()
1002    print(sys.argv)
1003    print()
1004
1005def print_environ_usage():
1006    """Dump a list of environment variables used by CGI as HTML."""
1007    print("""
1008<H3>These environment variables could have been set:</H3>
1009<UL>
1010<LI>AUTH_TYPE
1011<LI>CONTENT_LENGTH
1012<LI>CONTENT_TYPE
1013<LI>DATE_GMT
1014<LI>DATE_LOCAL
1015<LI>DOCUMENT_NAME
1016<LI>DOCUMENT_ROOT
1017<LI>DOCUMENT_URI
1018<LI>GATEWAY_INTERFACE
1019<LI>LAST_MODIFIED
1020<LI>PATH
1021<LI>PATH_INFO
1022<LI>PATH_TRANSLATED
1023<LI>QUERY_STRING
1024<LI>REMOTE_ADDR
1025<LI>REMOTE_HOST
1026<LI>REMOTE_IDENT
1027<LI>REMOTE_USER
1028<LI>REQUEST_METHOD
1029<LI>SCRIPT_NAME
1030<LI>SERVER_NAME
1031<LI>SERVER_PORT
1032<LI>SERVER_PROTOCOL
1033<LI>SERVER_ROOT
1034<LI>SERVER_SOFTWARE
1035</UL>
1036In addition, HTTP headers sent by the server may be passed in the
1037environment as well.  Here are some common variable names:
1038<UL>
1039<LI>HTTP_ACCEPT
1040<LI>HTTP_CONNECTION
1041<LI>HTTP_HOST
1042<LI>HTTP_PRAGMA
1043<LI>HTTP_REFERER
1044<LI>HTTP_USER_AGENT
1045</UL>
1046""")
1047
1048
1049# Utilities
1050# =========
1051
1052def escape(s, quote=None):
1053    """Deprecated API."""
1054    warn("cgi.escape is deprecated, use html.escape instead",
1055         DeprecationWarning, stacklevel=2)
1056    s = s.replace("&", "&amp;") # Must be done first!
1057    s = s.replace("<", "&lt;")
1058    s = s.replace(">", "&gt;")
1059    if quote:
1060        s = s.replace('"', "&quot;")
1061    return s
1062
1063
1064def valid_boundary(s):
1065    import re
1066    if isinstance(s, bytes):
1067        _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1068    else:
1069        _vb_pattern = "^[ -~]{0,200}[!-~]$"
1070    return re.match(_vb_pattern, s)
1071
1072# Invoke mainline
1073# ===============
1074
1075# Call test() when this file is run as a script (not imported as a module)
1076if __name__ == '__main__':
1077    test()
1078