1"""Tests of the builder registry.""" 2 3import unittest 4 5from bs4 import BeautifulSoup 6from bs4.builder import ( 7 builder_registry as registry, 8 HTMLParserTreeBuilder, 9 TreeBuilderRegistry, 10) 11 12try: 13 from bs4.builder import HTML5TreeBuilder 14 HTML5LIB_PRESENT = True 15except ImportError: 16 HTML5LIB_PRESENT = False 17 18try: 19 from bs4.builder import ( 20 LXMLTreeBuilderForXML, 21 LXMLTreeBuilder, 22 ) 23 LXML_PRESENT = True 24except ImportError: 25 LXML_PRESENT = False 26 27 28class BuiltInRegistryTest(unittest.TestCase): 29 """Test the built-in registry with the default builders registered.""" 30 31 def test_combination(self): 32 if LXML_PRESENT: 33 self.assertEqual(registry.lookup('fast', 'html'), 34 LXMLTreeBuilder) 35 36 if LXML_PRESENT: 37 self.assertEqual(registry.lookup('permissive', 'xml'), 38 LXMLTreeBuilderForXML) 39 self.assertEqual(registry.lookup('strict', 'html'), 40 HTMLParserTreeBuilder) 41 if HTML5LIB_PRESENT: 42 self.assertEqual(registry.lookup('html5lib', 'html'), 43 HTML5TreeBuilder) 44 45 def test_lookup_by_markup_type(self): 46 if LXML_PRESENT: 47 self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) 48 self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) 49 else: 50 self.assertEqual(registry.lookup('xml'), None) 51 if HTML5LIB_PRESENT: 52 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) 53 else: 54 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) 55 56 def test_named_library(self): 57 if LXML_PRESENT: 58 self.assertEqual(registry.lookup('lxml', 'xml'), 59 LXMLTreeBuilderForXML) 60 self.assertEqual(registry.lookup('lxml', 'html'), 61 LXMLTreeBuilder) 62 if HTML5LIB_PRESENT: 63 self.assertEqual(registry.lookup('html5lib'), 64 HTML5TreeBuilder) 65 66 self.assertEqual(registry.lookup('html.parser'), 67 HTMLParserTreeBuilder) 68 69 def test_beautifulsoup_constructor_does_lookup(self): 70 # You can pass in a string. 71 BeautifulSoup("", features="html") 72 # Or a list of strings. 73 BeautifulSoup("", features=["html", "fast"]) 74 75 # You'll get an exception if BS can't find an appropriate 76 # builder. 77 self.assertRaises(ValueError, BeautifulSoup, 78 "", features="no-such-feature") 79 80class RegistryTest(unittest.TestCase): 81 """Test the TreeBuilderRegistry class in general.""" 82 83 def setUp(self): 84 self.registry = TreeBuilderRegistry() 85 86 def builder_for_features(self, *feature_list): 87 cls = type('Builder_' + '_'.join(feature_list), 88 (object,), {'features' : feature_list}) 89 90 self.registry.register(cls) 91 return cls 92 93 def test_register_with_no_features(self): 94 builder = self.builder_for_features() 95 96 # Since the builder advertises no features, you can't find it 97 # by looking up features. 98 self.assertEqual(self.registry.lookup('foo'), None) 99 100 # But you can find it by doing a lookup with no features, if 101 # this happens to be the only registered builder. 102 self.assertEqual(self.registry.lookup(), builder) 103 104 def test_register_with_features_makes_lookup_succeed(self): 105 builder = self.builder_for_features('foo', 'bar') 106 self.assertEqual(self.registry.lookup('foo'), builder) 107 self.assertEqual(self.registry.lookup('bar'), builder) 108 109 def test_lookup_fails_when_no_builder_implements_feature(self): 110 builder = self.builder_for_features('foo', 'bar') 111 self.assertEqual(self.registry.lookup('baz'), None) 112 113 def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): 114 builder1 = self.builder_for_features('foo') 115 builder2 = self.builder_for_features('bar') 116 self.assertEqual(self.registry.lookup(), builder2) 117 118 def test_lookup_fails_when_no_tree_builders_registered(self): 119 self.assertEqual(self.registry.lookup(), None) 120 121 def test_lookup_gets_most_recent_builder_supporting_all_features(self): 122 has_one = self.builder_for_features('foo') 123 has_the_other = self.builder_for_features('bar') 124 has_both_early = self.builder_for_features('foo', 'bar', 'baz') 125 has_both_late = self.builder_for_features('foo', 'bar', 'quux') 126 lacks_one = self.builder_for_features('bar') 127 has_the_other = self.builder_for_features('foo') 128 129 # There are two builders featuring 'foo' and 'bar', but 130 # the one that also features 'quux' was registered later. 131 self.assertEqual(self.registry.lookup('foo', 'bar'), 132 has_both_late) 133 134 # There is only one builder featuring 'foo', 'bar', and 'baz'. 135 self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), 136 has_both_early) 137 138 def test_lookup_fails_when_cannot_reconcile_requested_features(self): 139 builder1 = self.builder_for_features('foo', 'bar') 140 builder2 = self.builder_for_features('foo', 'baz') 141 self.assertEqual(self.registry.lookup('bar', 'baz'), None) 142