1# Copyright (C) 2020 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""A library containing functions for diffing XML elements.""" 15import textwrap 16from typing import Any, Callable, Dict, Set 17import xml.etree.ElementTree as ET 18import dataclasses 19 20Element = ET.Element 21 22_INDENT = (' ' * 2) 23 24 25@dataclasses.dataclass 26class Change: 27 value_from: str 28 value_to: str 29 30 def __repr__(self): 31 return f'{self.value_from} -> {self.value_to}' 32 33 34@dataclasses.dataclass 35class ChangeMap: 36 """A collection of changes broken down by added, removed and modified. 37 38 Attributes: 39 added: A dictionary of string identifiers to the added string. 40 removed: A dictionary of string identifiers to the removed string. 41 modified: A dictionary of string identifiers to the changed object. 42 """ 43 added: Dict[str, str] = dataclasses.field(default_factory=dict) 44 removed: Dict[str, str] = dataclasses.field(default_factory=dict) 45 modified: Dict[str, Any] = dataclasses.field(default_factory=dict) 46 47 def __repr__(self): 48 ret_str = '' 49 if self.added: 50 ret_str += 'Added:\n' 51 for value in self.added.values(): 52 ret_str += textwrap.indent(str(value) + '\n', _INDENT) 53 if self.removed: 54 ret_str += 'Removed:\n' 55 for value in self.removed.values(): 56 ret_str += textwrap.indent(str(value) + '\n', _INDENT) 57 if self.modified: 58 ret_str += 'Modified:\n' 59 for name, value in self.modified.items(): 60 ret_str += textwrap.indent(name + ':\n', _INDENT) 61 ret_str += textwrap.indent(str(value) + '\n', _INDENT * 2) 62 return ret_str 63 64 def __bool__(self): 65 return bool(self.added) or bool(self.removed) or bool(self.modified) 66 67 68def element_string(e: Element) -> str: 69 return ET.tostring(e).decode(encoding='UTF-8').strip() 70 71 72def attribute_changes(e1: Element, e2: Element, 73 ignored_attrs: Set[str]) -> ChangeMap: 74 """Get the changes in attributes between two XML elements. 75 76 Arguments: 77 e1: the first xml element. 78 e2: the second xml element. 79 ignored_attrs: a set of attribute names to ignore changes. 80 81 Returns: 82 A ChangeMap of attribute changes. Keyed by attribute name. 83 """ 84 changes = ChangeMap() 85 attributes = set(e1.keys()) | set(e2.keys()) 86 for attr in attributes: 87 if attr in ignored_attrs: 88 continue 89 a1 = e1.get(attr) 90 a2 = e2.get(attr) 91 if a1 == a2: 92 continue 93 elif not a1: 94 changes.added[attr] = a2 or '' 95 elif not a2: 96 changes.removed[attr] = a1 97 else: 98 changes.modified[attr] = Change(value_from=a1, value_to=a2) 99 return changes 100 101 102def compare_subelements( 103 tag: str, 104 p1: Element, 105 p2: Element, 106 ignored_attrs: Set[str], 107 key_fn: Callable[[Element], str], 108 diff_fn: Callable[[Element, Element, Set[str]], Any]) -> ChangeMap: 109 """Get the changes between subelements of two parent elements. 110 111 Arguments: 112 tag: tag name for children element. 113 p1: the base parent xml element. 114 p2: the parent xml element to compare 115 ignored_attrs: a set of attribute names to ignore changes. 116 key_fn: Function that takes a subelement and returns a key 117 diff_fn: Function that take two subelements and a set of ignored 118 attributes, returns the differences 119 120 Returns: 121 A ChangeMap object of the changes. 122 """ 123 changes = ChangeMap() 124 group1 = {} 125 for e1 in p1.findall(tag): 126 group1[key_fn(e1)] = e1 127 128 for e2 in p2.findall(tag): 129 key = key_fn(e2) 130 e1 = group1.pop(key, None) 131 if e1 is None: 132 changes.added[key] = element_string(e2) 133 else: 134 echange = diff_fn(e1, e2, ignored_attrs) 135 if echange: 136 changes.modified[key] = echange 137 138 for name, e1 in group1.items(): 139 changes.removed[name] = element_string(e1) 140 141 return changes 142