1#!/usr/bin/env python3 2# 3# Copyright (C) 2023 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17# Sample Usage: 18# $ python3 merge_orderfile.py --order-files %../orderfiles/test 19# 20# Try '-h' for a full list of command line arguments. 21# 22# Note: We allow three formats: Folder, File, and CSV 23# As lower end devices require the most help, you can give 24# their order files a higher weight. 25# You can only provide weights if you choose File format. 26# For example, an order file weight of 4 means the edges 27# in the graph will be multiplied by 4. 28# CSV and Folder assume all files have a weight of 1. 29# An example file can be found at ../test/merge-test/merge.txt 30 31from bitarray import bitarray 32import argparse 33import graphviz 34 35import orderfile_utils 36 37class Vertex(object): 38 """Vertex (symbol) in the graph.""" 39 def __init__(self, name: str) -> None: 40 self.name = name 41 self.count = 0 42 43 def __eq__(self, other: object) -> bool: 44 if isinstance(other, Vertex): 45 return self.name == other.name 46 return False 47 48 def __hash__(self) -> int: 49 return hash(self.name) 50 51 def __str__(self) -> str: 52 return f'{self.name}({self.count})' 53 54 def appears(self) -> None: 55 self.count += 1 56 57class Graph(object): 58 """Graph representation of the order files.""" 59 def __init__(self) -> None: 60 self.graph = {} 61 self.reverse = {} 62 self.vertices = {} 63 64 def __str__(self) -> str: 65 string = "" 66 for (f_symbol, value) in self.graph.items(): 67 for (t_symbol, weight) in self.graph[f_symbol].items(): 68 string += f'{f_symbol} --{weight}--> {t_symbol}\n' 69 return string 70 71 def addVertex(self, symbol: str) -> None: 72 if symbol not in self.vertices: 73 v = Vertex(symbol) 74 self.vertices[symbol] = v 75 self.graph[v] = {} 76 self.reverse[v] = {} 77 78 self.vertices[symbol].appears() 79 80 def addEdge(self, from_symbol: str, to_symbol: str) -> None: 81 """Add an edge (it represents two symbols are consecutive).""" 82 from_vertex = self.vertices.get(from_symbol) 83 to_vertex = self.vertices.get(to_symbol) 84 85 if from_vertex is None: 86 raise RuntimeError(f"Symbol {from_symbol} is not in graph") 87 88 if to_vertex is None: 89 raise RuntimeError(f"Symbol {to_symbol} is not in graph") 90 91 if to_vertex not in self.graph[from_vertex]: 92 self.graph[from_vertex][to_vertex] = 0 93 self.reverse[to_vertex][from_vertex] = 0 94 95 self.graph[from_vertex][to_vertex] += 1 96 self.reverse[to_vertex][from_vertex] += 1 97 98 def removeEdgeCompletely(self, from_symbol: str, to_symbol: str) -> None: 99 """Remove the edge from the graph""" 100 from_vertex = self.vertices.get(from_symbol) 101 to_vertex = self.vertices.get(to_symbol) 102 103 if from_vertex is None: 104 raise RuntimeError(f"Symbol {from_symbol} is not in graph") 105 106 if to_vertex is None: 107 raise RuntimeError(f"Symbol {to_symbol} is not in graph") 108 109 del self.graph[from_vertex][to_vertex] 110 del self.reverse[to_vertex][from_vertex] 111 112 to_vertex.count -= 1 113 114 def checkVertex(self, symbol: str) -> bool: 115 return symbol in self.vertices 116 117 def checkEdge(self, from_symbol: str, to_symbol: str) -> bool: 118 if not self.checkVertex(from_symbol): 119 return False 120 121 if not self.checkVertex(to_symbol): 122 return False 123 124 from_vertex = self.vertices.get(from_symbol) 125 to_vertex = self.vertices.get(to_symbol) 126 127 if from_vertex not in self.graph: 128 return False 129 130 return to_vertex in self.graph[from_vertex] 131 132 def checkEdgeWeight(self, from_symbol: str, to_symbol: str, weight: str) -> bool: 133 if not self.checkEdge(from_symbol, to_symbol): 134 return False 135 136 from_vertex = self.vertices.get(from_symbol) 137 to_vertex = self.vertices.get(to_symbol) 138 139 return self.graph[from_vertex][to_vertex] == weight 140 141 def getOutEdges(self, symbol: str): 142 """Graph the out edges for a vertex.""" 143 out_edges = [] 144 vertex = self.vertices.get(symbol) 145 if vertex is None: 146 raise RuntimeError(f"Symbol {symbol} is not in graph") 147 148 for (key, value) in self.graph[vertex].items(): 149 out_edges.append((key, value)) 150 151 return out_edges 152 153 def getInEdges(self, symbol: str): 154 """Graph the in edges for a vertex.""" 155 in_edges = [] 156 vertex = self.vertices.get(symbol) 157 if vertex is None: 158 raise RuntimeError(f"Symbol {symbol} is not in graph") 159 160 for (key, value) in self.reverse[vertex].items(): 161 in_edges.append((key, value)) 162 163 return in_edges 164 165 def getRoots(self, reverse=False) -> list[str]: 166 """Get the roots of the graph (Vertex with no in edges).""" 167 roots = [] 168 for (symbol,_) in self.vertices.items(): 169 if not reverse: 170 if len(self.getInEdges(symbol)) == 0: 171 roots.append(symbol) 172 else: 173 # If you want the reverse (vertex with no out edges) 174 if len(self.getOutEdges(symbol)) == 0: 175 roots.append(symbol) 176 177 return roots 178 179 def __cyclesUtil(self, vertex: Vertex) -> None: 180 self.visited.add(vertex) 181 self.curr_search.append(vertex) 182 183 for (out_vertex, _) in self.graph[vertex].items(): 184 # If vertex already appeared in current depth search, we have a backedge 185 if out_vertex in self.curr_search: 186 # We save save all vertices in the cycle because an edge from the cycle will be removed 187 index = self.curr_search.index(out_vertex) 188 temp_lst = self.curr_search[index:] 189 self.cycles.append(temp_lst) 190 # If vertex visited before in a previous search, we do not need to search from it again 191 elif out_vertex not in self.visited: 192 self.__cyclesUtil(out_vertex) 193 194 self.curr_search.pop() 195 196 def getCycles(self) -> list[list[tuple[str]]]: 197 self.visited = set() 198 self.curr_search = [] 199 self.cycles = [] 200 lst = [] 201 202 for (_, vertex) in self.vertices.items(): 203 if vertex not in self.visited: 204 self.__cyclesUtil(vertex) 205 206 return self.cycles 207 208 # Get immediate dominator for each vertex 209 def getDominators(self, post=False): 210 # Create a bitarray for each vertex to showcase which vertices 211 # are dominators 212 num_vertices = len(self.vertices) 213 dominators = {} 214 mapping = [] 215 for (_, vertex) in self.vertices.items(): 216 mapping.append(vertex) 217 ba = bitarray(num_vertices) 218 ba.setall(True) 219 dominators[vertex] = ba 220 221 # Add the root vertices 222 stack = [] 223 roots = self.getRoots(post) 224 for root in roots: 225 stack.append((None, self.vertices[root])) 226 227 while len(stack) != 0: 228 (parent, child) = stack.pop() 229 230 # If no parent, you have no dominators from above 231 # If you have a parent, your dominations is the common dominators 232 # between all parents 233 if parent is None: 234 dominators[child].setall(False) 235 else: 236 dominators[child] &= dominators[parent] 237 238 # You are dominator of yourself 239 index = mapping.index(child) 240 dominators[child][index] = True 241 if not post: 242 for (out_vertex,_) in self.graph[child].items(): 243 stack.append((child, out_vertex)) 244 else: 245 for (out_vertex,_) in self.reverse[child].items(): 246 stack.append((child, out_vertex)) 247 248 for (vertex, ba) in dominators.items(): 249 # If no Trues in bitarray, you have no immediate dominator 250 # because you are a root vertex. Else, you can find the 251 # most left True vertex excluding yourself 252 index = mapping.index(vertex) 253 ba[index] = False 254 if True not in ba: 255 dominators[vertex] = None 256 else: 257 # Due to reverse, this is the actual index in the initial bitarray 258 dominator_index = ba.index(True) 259 dominators[vertex] = mapping[dominator_index] 260 261 return dominators 262 263 def __printOrderUtil(self, vertex): 264 # If already visit, we do not need to get order 265 if vertex in self.visited: 266 return 267 268 self.order.append(vertex) 269 self.visited.add(vertex) 270 271 # Get out edges and sort them based on their weightage 272 out_edges = self.getOutEdges(vertex.name) 273 out_edges.sort(key = lambda x: x[1], reverse=True) 274 275 # We continue dfs based on the largest weight 276 for (out, _) in out_edges: 277 self.__printOrderUtil(out) 278 279 def printOrder(self, output): 280 self.order = [] 281 self.visited = set() 282 stack = [] 283 284 # Create an order using DFS from the root 285 for root in self.getRoots(): 286 self.__printOrderUtil(self.vertices[root]) 287 288 # Write the order to a file 289 with open(output, "w") as f: 290 for vertex in self.order: 291 f.write(f"{vertex.name}\n") 292 293 def exportGraph(self, output: str) -> None: 294 """Export graph as a dot file and pdf file.""" 295 dot = graphviz.Digraph(comment='Graph Representation of Orderfile') 296 297 for (from_vertex, to_vertices) in self.graph.items(): 298 for (to_vertex, weight) in to_vertices.items(): 299 dot.edge(from_vertex.__str__(), to_vertex.__str__(), label=str(weight)) 300 301 dot.render(filename=output) 302 303 304def parse_args() -> argparse.Namespace: 305 """Parses and returns command line arguments.""" 306 307 parser = argparse.ArgumentParser(prog="merge_orderfile", 308 description="Merge Order Files") 309 310 parser.add_argument( 311 "--order-files", 312 required=True, 313 help="A collection of order files that need to be merged together." 314 "Format: A file-per-line file with @, a folder with ^, or comma separated values within a quotation." 315 "For example, you can say @file.txt, ^path/to/folder or '1.orderfile,2.orderfile'.") 316 317 parser.add_argument( 318 "--output", 319 default="default.orderfile", 320 help="Provide the output file name for the order file. Default Name: default.orderfile") 321 322 parser.add_argument( 323 "--graph-image", 324 help="Provide the output image name for the graph representation of the order files.") 325 326 return parser.parse_args() 327 328def removeCycles(graph: Graph) -> None: 329 # Remove cycles created by combining order files 330 for cycleList in graph.getCycles(): 331 # Get the sum of in edge weights for all vertices in the cycle 332 # We exclude the cycle edges from the calculation 333 # For example, cycle = [a,b,c] where cycle_edges=[a->b, b->c, c->a] 334 # in_edges(a) = [main, c] 335 # in_edges(b) = [a] 336 # in_edges(c) = [b] 337 # 338 # Excluding cycle edges: 339 # in_edges(a) = [main] = 1 340 # in_edges(b) = [] = 0 341 # in_edges(c) = [] = 0 342 inner_edges = [graph.getInEdges(vertex.name) for vertex in cycleList] 343 inner_weights = [] 344 for inner_edge in inner_edges: 345 total = 0 346 for edge in inner_edge: 347 if edge[0] not in cycleList: 348 total += edge[1] 349 inner_weights.append(total) 350 351 # We remove the cycle edge that leads to the highest sum of in-edges for a vertex 352 # because the vertex has other options for ordering. 353 # In the above example, we remove c->a 354 max_inner_weight = max(inner_weights) 355 index = inner_weights.index(max_inner_weight) 356 prev = index - 1 357 if prev < 0: 358 prev = len(inner_weights) - 1 359 to_vertex = cycleList[index] 360 from_vertex = cycleList[prev] 361 362 graph.removeEdgeCompletely(from_vertex.name, to_vertex.name) 363 364def addSymbolsToGraph(graph: Graph, order: list[str], weight: int = 1) -> None: 365 prev_symbol = None 366 for symbol in order: 367 graph.addVertex(symbol) 368 369 if prev_symbol is not None: 370 for i in range(weight): 371 graph.addEdge(prev_symbol, symbol) 372 373 prev_symbol = symbol 374 375def createGraph(files: list[str]) -> Graph: 376 graph = Graph() 377 378 # Create graph representation based on combining the order files 379 for (orderfile, weight) in files: 380 with open(orderfile, "r", encoding="utf-8") as f: 381 lst = [] 382 for line in f: 383 line = line.strip() 384 lst.append(line) 385 386 addSymbolsToGraph(graph, lst, weight) 387 388 return graph 389 390def main() -> None: 391 args = parse_args() 392 393 files = orderfile_utils.parse_merge_list(args.order_files) 394 graph = createGraph(files) 395 396 # Assert no cycles after removing them 397 removeCycles(graph) 398 assert(len(graph.getCycles()) == 0) 399 400 # Create an image of the graph representation 401 if args.graph_image: 402 graph.exportGraph(args.graph_image) 403 404 # Create order file from the graph structure 405 graph.printOrder(args.output) 406 407if __name__ == '__main__': 408 main() 409