• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2
3# Copyright 2020 Google LLC
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import sys
18import atheris
19
20with atheris.instrument_imports():
21  import logging
22  import warnings
23  from bs4 import BeautifulSoup
24
25
26try:
27  import HTMLParser
28  HTMLParseError = HTMLParser.HTMLParseError
29except ImportError:
30  # HTMLParseError is removed in Python 3.5. Since it can never be
31  # thrown in 3.5, we can just define our own class as a placeholder.
32
33  class HTMLParseError(Exception):
34    pass
35
36
37@atheris.instrument_func
38def TestOneInput(data):
39  """TestOneInput gets random data from the fuzzer, and throws it at bs4."""
40  if len(data) < 1:
41    return
42
43  parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
44  try:
45    idx = int(data[0]) % len(parsers)
46  except ValueError:
47    return
48
49  try:
50    soup = BeautifulSoup(data[1:], features=parsers[idx])
51  except HTMLParseError:
52    return
53  except ValueError:
54    return
55
56  list(soup.find_all(True))
57  soup.prettify()
58
59
60def main():
61  logging.disable(logging.CRITICAL)
62  warnings.filterwarnings('ignore')
63  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
64  atheris.Fuzz()
65
66
67if __name__ == "__main__":
68  main()
69