• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Tests of the builder registry."""
2
3import unittest
4
5from bs4 import BeautifulSoup
6from bs4.builder import (
7    builder_registry as registry,
8    HTMLParserTreeBuilder,
9    TreeBuilderRegistry,
10)
11
12try:
13    from bs4.builder import HTML5TreeBuilder
14    HTML5LIB_PRESENT = True
15except ImportError:
16    HTML5LIB_PRESENT = False
17
18try:
19    from bs4.builder import (
20        LXMLTreeBuilderForXML,
21        LXMLTreeBuilder,
22        )
23    LXML_PRESENT = True
24except ImportError:
25    LXML_PRESENT = False
26
27
28class BuiltInRegistryTest(unittest.TestCase):
29    """Test the built-in registry with the default builders registered."""
30
31    def test_combination(self):
32        if LXML_PRESENT:
33            self.assertEqual(registry.lookup('fast', 'html'),
34                             LXMLTreeBuilder)
35
36        if LXML_PRESENT:
37            self.assertEqual(registry.lookup('permissive', 'xml'),
38                             LXMLTreeBuilderForXML)
39        self.assertEqual(registry.lookup('strict', 'html'),
40                          HTMLParserTreeBuilder)
41        if HTML5LIB_PRESENT:
42            self.assertEqual(registry.lookup('html5lib', 'html'),
43                              HTML5TreeBuilder)
44
45    def test_lookup_by_markup_type(self):
46        if LXML_PRESENT:
47            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
48            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
49        else:
50            self.assertEqual(registry.lookup('xml'), None)
51            if HTML5LIB_PRESENT:
52                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
53            else:
54                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
55
56    def test_named_library(self):
57        if LXML_PRESENT:
58            self.assertEqual(registry.lookup('lxml', 'xml'),
59                             LXMLTreeBuilderForXML)
60            self.assertEqual(registry.lookup('lxml', 'html'),
61                             LXMLTreeBuilder)
62        if HTML5LIB_PRESENT:
63            self.assertEqual(registry.lookup('html5lib'),
64                              HTML5TreeBuilder)
65
66        self.assertEqual(registry.lookup('html.parser'),
67                          HTMLParserTreeBuilder)
68
69    def test_beautifulsoup_constructor_does_lookup(self):
70        # You can pass in a string.
71        BeautifulSoup("", features="html")
72        # Or a list of strings.
73        BeautifulSoup("", features=["html", "fast"])
74
75        # You'll get an exception if BS can't find an appropriate
76        # builder.
77        self.assertRaises(ValueError, BeautifulSoup,
78                          "", features="no-such-feature")
79
80class RegistryTest(unittest.TestCase):
81    """Test the TreeBuilderRegistry class in general."""
82
83    def setUp(self):
84        self.registry = TreeBuilderRegistry()
85
86    def builder_for_features(self, *feature_list):
87        cls = type('Builder_' + '_'.join(feature_list),
88                   (object,), {'features' : feature_list})
89
90        self.registry.register(cls)
91        return cls
92
93    def test_register_with_no_features(self):
94        builder = self.builder_for_features()
95
96        # Since the builder advertises no features, you can't find it
97        # by looking up features.
98        self.assertEqual(self.registry.lookup('foo'), None)
99
100        # But you can find it by doing a lookup with no features, if
101        # this happens to be the only registered builder.
102        self.assertEqual(self.registry.lookup(), builder)
103
104    def test_register_with_features_makes_lookup_succeed(self):
105        builder = self.builder_for_features('foo', 'bar')
106        self.assertEqual(self.registry.lookup('foo'), builder)
107        self.assertEqual(self.registry.lookup('bar'), builder)
108
109    def test_lookup_fails_when_no_builder_implements_feature(self):
110        builder = self.builder_for_features('foo', 'bar')
111        self.assertEqual(self.registry.lookup('baz'), None)
112
113    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
114        builder1 = self.builder_for_features('foo')
115        builder2 = self.builder_for_features('bar')
116        self.assertEqual(self.registry.lookup(), builder2)
117
118    def test_lookup_fails_when_no_tree_builders_registered(self):
119        self.assertEqual(self.registry.lookup(), None)
120
121    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
122        has_one = self.builder_for_features('foo')
123        has_the_other = self.builder_for_features('bar')
124        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
125        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
126        lacks_one = self.builder_for_features('bar')
127        has_the_other = self.builder_for_features('foo')
128
129        # There are two builders featuring 'foo' and 'bar', but
130        # the one that also features 'quux' was registered later.
131        self.assertEqual(self.registry.lookup('foo', 'bar'),
132                          has_both_late)
133
134        # There is only one builder featuring 'foo', 'bar', and 'baz'.
135        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
136                          has_both_early)
137
138    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
139        builder1 = self.builder_for_features('foo', 'bar')
140        builder2 = self.builder_for_features('foo', 'baz')
141        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
142