1#!/usr/bin/env python3 2# Copyright © 2019, 2022 Intel Corporation 3# SPDX-License-Identifier: MIT 4 5from __future__ import annotations 6from collections import OrderedDict 7import copy 8import io 9import pathlib 10import os.path 11import re 12import xml.etree.ElementTree as et 13import typing 14 15if typing.TYPE_CHECKING: 16 class Args(typing.Protocol): 17 18 files: typing.List[pathlib.Path] 19 validate: bool 20 quiet: bool 21 22 23def get_filename(element: et.Element) -> str: 24 return element.attrib['filename'] 25 26def get_name(element: et.Element) -> str: 27 return element.attrib['name'] 28 29def get_value(element: et.Element) -> int: 30 return int(element.attrib['value'], 0) 31 32def get_start(element: et.Element) -> int: 33 return int(element.attrib['start'], 0) 34 35 36BASE_TYPES = { 37 'address', 38 'offset', 39 'int', 40 'uint', 41 'bool', 42 'float', 43 'mbz', 44 'mbo', 45} 46 47FIXED_PATTERN = re.compile(r"(s|u)(\d+)\.(\d+)") 48 49def is_base_type(name: str) -> bool: 50 return name in BASE_TYPES or FIXED_PATTERN.match(name) is not None 51 52def add_struct_refs(items: typing.OrderedDict[str, bool], node: et.Element) -> None: 53 if node.tag == 'field': 54 if 'type' in node.attrib and not is_base_type(node.attrib['type']): 55 t = node.attrib['type'] 56 items[t] = True 57 return 58 if node.tag not in {'struct', 'group'}: 59 return 60 for c in node: 61 add_struct_refs(items, c) 62 63 64class Struct(object): 65 def __init__(self, xml: et.Element): 66 self.xml = xml 67 self.name = xml.attrib['name'] 68 self.deps: typing.OrderedDict[str, Struct] = OrderedDict() 69 70 def find_deps(self, struct_dict, enum_dict) -> None: 71 deps: typing.OrderedDict[str, bool] = OrderedDict() 72 add_struct_refs(deps, self.xml) 73 for d in deps.keys(): 74 if d in struct_dict: 75 self.deps[d] = struct_dict[d] 76 77 def add_xml(self, items: typing.OrderedDict[str, et.Element]) -> None: 78 for d in self.deps.values(): 79 d.add_xml(items) 80 items[self.name] = self.xml 81 82 83# ordering of the various tag attributes 84GENXML_DESC = { 85 'genxml' : [ 'name', 'gen', ], 86 'import' : [ 'name', ], 87 'exclude' : [ 'name', ], 88 'enum' : [ 'name', 'value', 'prefix', ], 89 'struct' : [ 'name', 'length', ], 90 'field' : [ 'name', 'start', 'end', 'type', 'default', 'prefix', 'nonzero' ], 91 'instruction' : [ 'name', 'bias', 'length', 'engine', ], 92 'value' : [ 'name', 'value', 'dont_use', ], 93 'group' : [ 'count', 'start', 'size', ], 94 'register' : [ 'name', 'length', 'num', ], 95} 96 97 98def node_validator(old: et.Element, new: et.Element) -> bool: 99 """Compare to ElementTree Element nodes. 100 101 There is no builtin equality method, so calling `et.Element == et.Element` is 102 equivalent to calling `et.Element is et.Element`. We instead want to compare 103 that the contents are the same, including the order of children and attributes 104 """ 105 return ( 106 # Check that the attributes are the same 107 old.tag == new.tag and 108 old.text == new.text and 109 (old.tail or "").strip() == (new.tail or "").strip() and 110 list(old.attrib.items()) == list(new.attrib.items()) and 111 len(old) == len(new) and 112 113 # check that there are no unexpected attributes 114 set(new.attrib).issubset(GENXML_DESC[new.tag]) and 115 116 # check that the attributes are sorted 117 list(new.attrib) == list(old.attrib) and 118 all(node_validator(f, s) for f, s in zip(old, new)) 119 ) 120 121 122def process_attribs(elem: et.Element) -> None: 123 valid = GENXML_DESC[elem.tag] 124 # sort and prune attributes 125 elem.attrib = OrderedDict(sorted(((k, v) for k, v in elem.attrib.items() if k in valid), 126 key=lambda x: valid.index(x[0]))) 127 for e in elem: 128 process_attribs(e) 129 130 131def sort_xml(xml: et.ElementTree) -> None: 132 genxml = xml.getroot() 133 134 imports = xml.findall('import') 135 136 enums = sorted(xml.findall('enum'), key=get_name) 137 enum_dict: typing.Dict[str, et.Element] = {} 138 for e in enums: 139 e[:] = sorted(e, key=get_value) 140 enum_dict[e.attrib['name']] = e 141 142 # Structs are a bit annoying because they can refer to each other. We sort 143 # them alphabetically and then build a graph of dependencies. Finally we go 144 # through the alphabetically sorted list and print out dependencies first. 145 structs = sorted(xml.findall('./struct'), key=get_name) 146 wrapped_struct_dict: typing.Dict[str, Struct] = {} 147 for s in structs: 148 s[:] = sorted(s, key=get_start) 149 ws = Struct(s) 150 wrapped_struct_dict[ws.name] = ws 151 152 for ws in wrapped_struct_dict.values(): 153 ws.find_deps(wrapped_struct_dict, enum_dict) 154 155 sorted_structs: typing.OrderedDict[str, et.Element] = OrderedDict() 156 for s in structs: 157 _s = wrapped_struct_dict[s.attrib['name']] 158 _s.add_xml(sorted_structs) 159 160 instructions = sorted(xml.findall('./instruction'), key=get_name) 161 for i in instructions: 162 i[:] = sorted(i, key=get_start) 163 164 registers = sorted(xml.findall('./register'), key=get_name) 165 for r in registers: 166 r[:] = sorted(r, key=get_start) 167 168 new_elems = (imports + enums + list(sorted_structs.values()) + 169 instructions + registers) 170 for n in new_elems: 171 process_attribs(n) 172 genxml[:] = new_elems 173 174 175# `default_imports` documents which files should be imported for our 176# genxml files. This is only useful if a genxml file does not already 177# include imports. 178# 179# Basically, this allows the genxml_import.py tool used with the 180# --import switch to know which files should be added as an import. 181# (genxml_import.py uses GenXml.add_xml_imports, which relies on 182# `default_imports`.) 183default_imports = OrderedDict([ 184 ('gen40.xml', ()), 185 ('gen45.xml', ('gen40.xml',)), 186 ('gen50.xml', ('gen45.xml',)), 187 ('gen60.xml', ('gen50.xml',)), 188 ('gen70.xml', ('gen60.xml',)), 189 ('gen75.xml', ('gen70.xml',)), 190 ('gen80.xml', ('gen75.xml',)), 191 ('gen90.xml', ('gen80.xml',)), 192 ('gen110.xml', ('gen90.xml',)), 193 ('gen120.xml', ('gen110.xml',)), 194 ('gen125.xml', ('gen120.xml',)), 195 ('gen200.xml', ('gen125.xml',)), 196 ('gen200_rt.xml', ('gen125_rt.xml',)), 197 ('gen300.xml', ('gen200.xml',)), 198 ('gen300_rt.xml', ('gen200_rt.xml',)), 199 ]) 200known_genxml_files = list(default_imports.keys()) 201 202 203def genxml_path_to_key(path): 204 try: 205 return known_genxml_files.index(path.name) 206 except ValueError: 207 return len(known_genxml_files) 208 209 210def sort_genxml_files(files): 211 files.sort(key=genxml_path_to_key) 212 213class GenXml(object): 214 def __init__(self, filename, import_xml=False, files=None): 215 if files is not None: 216 self.files = files 217 else: 218 self.files = set() 219 self.filename = pathlib.Path(filename) 220 221 # Assert that the file hasn't already been loaded which would 222 # indicate a loop in genxml imports, and lead to infinite 223 # recursion. 224 assert self.filename not in self.files 225 226 self.files.add(self.filename) 227 self.et = et.parse(self.filename) 228 if import_xml: 229 self.merge_imported() 230 231 def process_imported(self, merge=False, drop_dupes=False): 232 """Processes imported genxml files. 233 234 This helper function scans imported genxml files and has two 235 mutually exclusive operating modes. 236 237 If `merge` is True, then items will be merged into the 238 `self.et` data structure. 239 240 If `drop_dupes` is True, then any item that is a duplicate to 241 an item imported will be droped from the `self.et` data 242 structure. This is used by `self.optimize_xml_import` to 243 shrink the size of the genxml file by reducing duplications. 244 245 """ 246 assert merge != drop_dupes 247 orig_elements = set(self.et.getroot()) 248 name_and_obj = lambda i: (get_name(i), i) 249 filter_ty = lambda s: filter(lambda i: i.tag == s, orig_elements) 250 filter_ty_item = lambda s: dict(map(name_and_obj, filter_ty(s))) 251 252 # orig_by_tag stores items defined directly in the genxml 253 # file. If a genxml item is defined in the genxml directly, 254 # then any imported items of the same name are ignored. 255 orig_by_tag = { 256 'enum': filter_ty_item('enum'), 257 'struct': filter_ty_item('struct'), 258 'instruction': filter_ty_item('instruction'), 259 'register': filter_ty_item('register'), 260 } 261 262 for item in orig_elements: 263 if item.tag == 'import': 264 assert 'name' in item.attrib 265 filename = os.path.split(item.attrib['name']) 266 exceptions = set() 267 for e in item: 268 assert e.tag == 'exclude' 269 exceptions.add(e.attrib['name']) 270 # We should be careful to restrict loaded files to 271 # those under the source or build trees. For now, only 272 # allow siblings of the current xml file. 273 assert filename[0] == '', 'Directories not allowed with import' 274 filename = os.path.join(os.path.dirname(self.filename), 275 filename[1]) 276 assert os.path.exists(filename), f'{self.filename} {filename}' 277 278 # Here we load the imported genxml file. We set 279 # `import_xml` to true so that any imports in the 280 # imported genxml will be merged during the loading 281 # process. 282 # 283 # The `files` parameter is a set of files that have 284 # been loaded, and it is used to prevent any cycles 285 # (infinite recursion) while loading imported genxml 286 # files. 287 genxml = GenXml(filename, import_xml=True, files=self.files) 288 imported_elements = set(genxml.et.getroot()) 289 290 # `to_add` is a set of items that were imported an 291 # should be merged into the `self.et` data structure. 292 # This is only used when the `merge` parameter is 293 # True. 294 to_add = set() 295 # `to_remove` is a set of items that can safely be 296 # imported since the item is equivalent. This is only 297 # used when the `drop_duped` parameter is True. 298 to_remove = set() 299 for i in imported_elements: 300 if i.tag not in orig_by_tag: 301 continue 302 if i.attrib['name'] in exceptions: 303 continue 304 if i.attrib['name'] in orig_by_tag[i.tag]: 305 if merge: 306 # An item with this same name was defined 307 # in the genxml directly. There we should 308 # ignore (not merge) the imported item. 309 continue 310 else: 311 if drop_dupes: 312 # Since this item is not the imported 313 # genxml, we can't consider dropping it. 314 continue 315 if merge: 316 to_add.add(i) 317 else: 318 assert drop_dupes 319 orig_element = orig_by_tag[i.tag][i.attrib['name']] 320 if not node_validator(i, orig_element): 321 continue 322 to_remove.add(orig_element) 323 324 if len(to_add) > 0: 325 # Now that we have scanned through all the items 326 # in the imported genxml file, if any items were 327 # found which should be merged, we add them into 328 # our `self.et` data structure. After this it will 329 # be as if the items had been directly present in 330 # the genxml file. 331 assert len(to_remove) == 0 332 self.et.getroot().extend(list(to_add)) 333 sort_xml(self.et) 334 elif len(to_remove) > 0: 335 self.et.getroot()[:] = list(orig_elements - to_remove) 336 sort_xml(self.et) 337 338 def merge_imported(self): 339 """Merge imported items from genxml imports. 340 341 Genxml <import> tags specify that elements should be brought 342 in from another genxml source file. After this function is 343 called, these elements will become part of the `self.et` data 344 structure as if the elements had been directly included in the 345 genxml directly. 346 347 Items from imported genxml files will be completely ignore if 348 an item with the same name is already defined in the genxml 349 file. 350 351 """ 352 self.process_imported(merge=True) 353 354 def flatten_imported(self): 355 """Flattens the genxml to not include any imports 356 357 Essentially this helper will put the `self.et` into a state 358 that includes all imported items directly, and does not 359 contain any <import> tags. This is used by the 360 genxml_import.py with the --flatten switch to "undo" any 361 genxml imports. 362 363 """ 364 self.merge_imported() 365 root = self.et.getroot() 366 imports = root.findall('import') 367 for i in imports: 368 root.remove(i) 369 370 def add_xml_imports(self): 371 """Adds imports to the genxml file. 372 373 Using the `default_imports` structure, we add imports to the 374 genxml file. 375 376 """ 377 # `imports` is a set of filenames currently imported by the 378 # genxml. 379 imports = self.et.findall('import') 380 imports = set(map(lambda el: el.attrib['name'], imports)) 381 new_elements = [] 382 self_flattened = copy.deepcopy(self) 383 self_flattened.flatten_imported() 384 old_names = { el.attrib['name'] for el in self_flattened.et.getroot() } 385 for import_xml in default_imports.get(self.filename.name, tuple()): 386 if import_xml in imports: 387 # This genxml is already imported, so we don't need to 388 # add it as an import. 389 continue 390 el = et.Element('import', {'name': import_xml}) 391 import_path = self.filename.with_name(import_xml) 392 imported_genxml = GenXml(import_path, import_xml=True) 393 imported_names = { el.attrib['name'] 394 for el in imported_genxml.et.getroot() 395 if el.tag != 'import' } 396 # Importing this genxml could add some new items. When 397 # adding a genxml import, we don't want to add new items, 398 # unless they were already in the current genxml. So, we 399 # put them into a list of items to exclude when importing 400 # the genxml. 401 exclude_names = imported_names - old_names 402 for n in sorted(exclude_names): 403 el.append(et.Element('exclude', {'name': n})) 404 new_elements.append(el) 405 if len(new_elements) > 0: 406 self.et.getroot().extend(new_elements) 407 sort_xml(self.et) 408 409 def optimize_xml_import(self): 410 """Optimizes the genxml by dropping items that can be imported 411 412 Scans genxml <import> tags, and loads the imported file. If 413 any item in the imported file is a duplicate to an item in the 414 genxml file, then it will be droped from the `self.et` data 415 structure. 416 417 """ 418 self.process_imported(drop_dupes=True) 419 420 def filter_engines(self, engines): 421 changed = False 422 items = [] 423 for item in self.et.getroot(): 424 # When an instruction doesn't have the engine specified, 425 # it is considered to be for all engines. Otherwise, we 426 # check to see if it's tagged for the engines requested. 427 if item.tag == 'instruction' and 'engine' in item.attrib: 428 i_engines = set(item.attrib["engine"].split('|')) 429 if not (i_engines & engines): 430 # Drop this instruction because it doesn't support 431 # the requested engine types. 432 changed = True 433 continue 434 items.append(item) 435 if changed: 436 self.et.getroot()[:] = items 437 438 def filter_symbols(self, symbol_list): 439 symbols_allowed = {} 440 for sym in symbol_list: 441 symbols_allowed[sym] = sym 442 443 changed = False 444 items = [] 445 for item in self.et.getroot(): 446 if item.tag in ('instruction', 'struct', 'register') and \ 447 item.attrib['name'] not in symbols_allowed: 448 # Drop the item from the tree 449 changed = True 450 continue 451 items.append(item) 452 if changed: 453 self.et.getroot()[:] = items 454 455 def sort(self): 456 sort_xml(self.et) 457 458 def sorted_copy(self): 459 clone = copy.deepcopy(self) 460 clone.sort() 461 return clone 462 463 def is_equivalent_xml(self, other): 464 if len(self.et.getroot()) != len(other.et.getroot()): 465 return False 466 return all(node_validator(old, new) 467 for old, new in zip(self.et.getroot(), other.et.getroot())) 468 469 def write_file(self): 470 try: 471 old_genxml = GenXml(self.filename) 472 if self.is_equivalent_xml(old_genxml): 473 return 474 except Exception: 475 pass 476 477 b_io = io.BytesIO() 478 et.indent(self.et, space=' ') 479 self.et.write(b_io, encoding="utf-8", xml_declaration=True) 480 b_io.write(b'\n') 481 482 tmp = self.filename.with_suffix(f'{self.filename.suffix}.tmp') 483 tmp.write_bytes(b_io.getvalue()) 484 tmp.replace(self.filename) 485