• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Sample Usage:
18# $ python3 merge_orderfile.py --order-files %../orderfiles/test
19#
20# Try '-h' for a full list of command line arguments.
21#
22# Note: We allow three formats: Folder, File, and CSV
23# As lower end devices require the most help, you can give
24# their order files a higher weight.
25# You can only provide weights if you choose File format.
26# For example, an order file weight of 4 means the edges
27# in the graph will be multiplied by 4.
28# CSV and Folder assume all files have a weight of 1.
29# An example file can be found at ../test/merge-test/merge.txt
30
31from bitarray import bitarray
32import argparse
33import graphviz
34
35import orderfile_utils
36
37class Vertex(object):
38    """Vertex (symbol) in the graph."""
39    def __init__(self, name: str) -> None:
40        self.name = name
41        self.count = 0
42
43    def __eq__(self, other: object) -> bool:
44        if isinstance(other, Vertex):
45            return self.name == other.name
46        return False
47
48    def __hash__(self) -> int:
49        return hash(self.name)
50
51    def __str__(self) -> str:
52        return f'{self.name}({self.count})'
53
54    def appears(self) -> None:
55        self.count += 1
56
57class Graph(object):
58    """Graph representation of the order files."""
59    def __init__(self) -> None:
60        self.graph = {}
61        self.reverse = {}
62        self.vertices = {}
63
64    def __str__(self) -> str:
65        string = ""
66        for (f_symbol, value) in self.graph.items():
67            for (t_symbol, weight) in self.graph[f_symbol].items():
68                string += f'{f_symbol} --{weight}--> {t_symbol}\n'
69        return string
70
71    def addVertex(self, symbol: str) -> None:
72        if symbol not in self.vertices:
73            v = Vertex(symbol)
74            self.vertices[symbol] = v
75            self.graph[v] = {}
76            self.reverse[v] = {}
77
78        self.vertices[symbol].appears()
79
80    def addEdge(self, from_symbol: str, to_symbol: str) -> None:
81        """Add an edge (it represents two symbols are consecutive)."""
82        from_vertex = self.vertices.get(from_symbol)
83        to_vertex = self.vertices.get(to_symbol)
84
85        if from_vertex is None:
86            raise RuntimeError(f"Symbol {from_symbol} is not in graph")
87
88        if to_vertex is None:
89            raise RuntimeError(f"Symbol {to_symbol} is not in graph")
90
91        if to_vertex not in self.graph[from_vertex]:
92            self.graph[from_vertex][to_vertex] = 0
93            self.reverse[to_vertex][from_vertex] = 0
94
95        self.graph[from_vertex][to_vertex] += 1
96        self.reverse[to_vertex][from_vertex] += 1
97
98    def removeEdgeCompletely(self, from_symbol: str, to_symbol: str) -> None:
99        """Remove the edge from the graph"""
100        from_vertex = self.vertices.get(from_symbol)
101        to_vertex =  self.vertices.get(to_symbol)
102
103        if from_vertex is None:
104            raise RuntimeError(f"Symbol {from_symbol} is not in graph")
105
106        if to_vertex is None:
107            raise RuntimeError(f"Symbol {to_symbol} is not in graph")
108
109        del self.graph[from_vertex][to_vertex]
110        del self.reverse[to_vertex][from_vertex]
111
112        to_vertex.count -= 1
113
114    def checkVertex(self, symbol: str) -> bool:
115        return symbol in self.vertices
116
117    def checkEdge(self, from_symbol: str, to_symbol: str) -> bool:
118        if not self.checkVertex(from_symbol):
119            return False
120
121        if not self.checkVertex(to_symbol):
122            return False
123
124        from_vertex = self.vertices.get(from_symbol)
125        to_vertex =  self.vertices.get(to_symbol)
126
127        if from_vertex not in self.graph:
128            return False
129
130        return to_vertex in self.graph[from_vertex]
131
132    def checkEdgeWeight(self, from_symbol: str, to_symbol: str, weight: str) -> bool:
133        if not self.checkEdge(from_symbol, to_symbol):
134            return False
135
136        from_vertex = self.vertices.get(from_symbol)
137        to_vertex =  self.vertices.get(to_symbol)
138
139        return self.graph[from_vertex][to_vertex] == weight
140
141    def getOutEdges(self, symbol: str):
142        """Graph the out edges for a vertex."""
143        out_edges = []
144        vertex = self.vertices.get(symbol)
145        if vertex is None:
146            raise RuntimeError(f"Symbol {symbol} is not in graph")
147
148        for (key, value) in self.graph[vertex].items():
149            out_edges.append((key, value))
150
151        return out_edges
152
153    def getInEdges(self, symbol: str):
154        """Graph the in edges for a vertex."""
155        in_edges = []
156        vertex = self.vertices.get(symbol)
157        if vertex is None:
158            raise RuntimeError(f"Symbol {symbol} is not in graph")
159
160        for (key, value) in self.reverse[vertex].items():
161            in_edges.append((key, value))
162
163        return in_edges
164
165    def getRoots(self, reverse=False) -> list[str]:
166        """Get the roots of the graph (Vertex with no in edges)."""
167        roots = []
168        for (symbol,_) in self.vertices.items():
169            if not reverse:
170                if len(self.getInEdges(symbol)) == 0:
171                    roots.append(symbol)
172            else:
173                # If you want the reverse (vertex with no out edges)
174                if len(self.getOutEdges(symbol)) == 0:
175                    roots.append(symbol)
176
177        return roots
178
179    def __cyclesUtil(self, vertex: Vertex) -> None:
180        self.visited.add(vertex)
181        self.curr_search.append(vertex)
182
183        for (out_vertex, _) in self.graph[vertex].items():
184            # If vertex already appeared in current depth search, we have a backedge
185            if out_vertex in self.curr_search:
186                # We save save all vertices in the cycle because an edge from the cycle will be removed
187                index = self.curr_search.index(out_vertex)
188                temp_lst = self.curr_search[index:]
189                self.cycles.append(temp_lst)
190            # If vertex visited before in a previous search, we do not need to search from it again
191            elif out_vertex not in self.visited:
192                self.__cyclesUtil(out_vertex)
193
194        self.curr_search.pop()
195
196    def getCycles(self) -> list[list[tuple[str]]]:
197        self.visited = set()
198        self.curr_search = []
199        self.cycles = []
200        lst = []
201
202        for (_, vertex) in self.vertices.items():
203            if vertex not in self.visited:
204                self.__cyclesUtil(vertex)
205
206        return self.cycles
207
208    # Get immediate dominator for each vertex
209    def getDominators(self, post=False):
210        # Create a bitarray for each vertex to showcase which vertices
211        # are dominators
212        num_vertices = len(self.vertices)
213        dominators = {}
214        mapping = []
215        for (_, vertex) in self.vertices.items():
216            mapping.append(vertex)
217            ba = bitarray(num_vertices)
218            ba.setall(True)
219            dominators[vertex] = ba
220
221        # Add the root vertices
222        stack = []
223        roots = self.getRoots(post)
224        for root in roots:
225            stack.append((None, self.vertices[root]))
226
227        while len(stack) != 0:
228            (parent, child) = stack.pop()
229
230            # If no parent, you have no dominators from above
231            # If you have a parent, your dominations is the common dominators
232            # between all parents
233            if parent is None:
234                dominators[child].setall(False)
235            else:
236                dominators[child] &= dominators[parent]
237
238            # You are dominator of yourself
239            index = mapping.index(child)
240            dominators[child][index] = True
241            if not post:
242                for (out_vertex,_) in self.graph[child].items():
243                    stack.append((child, out_vertex))
244            else:
245                for (out_vertex,_) in self.reverse[child].items():
246                    stack.append((child, out_vertex))
247
248        for (vertex, ba) in dominators.items():
249            # If no Trues in bitarray, you have no immediate dominator
250            # because you are a root vertex. Else, you can find the
251            # most left True vertex excluding yourself
252            index = mapping.index(vertex)
253            ba[index] = False
254            if True not in ba:
255                dominators[vertex] = None
256            else:
257                # Due to reverse, this is the actual index in the initial bitarray
258                dominator_index = ba.index(True)
259                dominators[vertex] = mapping[dominator_index]
260
261        return dominators
262
263    def __printOrderUtil(self, vertex):
264        # If already visit, we do not need to get order
265        if vertex in self.visited:
266            return
267
268        self.order.append(vertex)
269        self.visited.add(vertex)
270
271        # Get out edges and sort them based on their weightage
272        out_edges = self.getOutEdges(vertex.name)
273        out_edges.sort(key = lambda x: x[1], reverse=True)
274
275        # We continue dfs based on the largest weight
276        for (out, _) in out_edges:
277            self.__printOrderUtil(out)
278
279    def printOrder(self, output):
280        self.order = []
281        self.visited = set()
282        stack = []
283
284        # Create an order using DFS from the root
285        for root in self.getRoots():
286            self.__printOrderUtil(self.vertices[root])
287
288        # Write the order to a file
289        with open(output, "w") as f:
290            for vertex in self.order:
291                f.write(f"{vertex.name}\n")
292
293    def exportGraph(self, output: str) -> None:
294        """Export graph as a dot file and pdf file."""
295        dot = graphviz.Digraph(comment='Graph Representation of Orderfile')
296
297        for (from_vertex, to_vertices) in self.graph.items():
298            for (to_vertex, weight) in to_vertices.items():
299                dot.edge(from_vertex.__str__(), to_vertex.__str__(), label=str(weight))
300
301        dot.render(filename=output)
302
303
304def parse_args() -> argparse.Namespace:
305    """Parses and returns command line arguments."""
306
307    parser = argparse.ArgumentParser(prog="merge_orderfile",
308                                    description="Merge Order Files")
309
310    parser.add_argument(
311        "--order-files",
312        required=True,
313        help="A collection of order files that need to be merged together."
314             "Format: A file-per-line file with @, a folder with ^, or comma separated values within a quotation."
315             "For example, you can say @file.txt, ^path/to/folder or '1.orderfile,2.orderfile'.")
316
317    parser.add_argument(
318        "--output",
319        default="default.orderfile",
320        help="Provide the output file name for the order file. Default Name: default.orderfile")
321
322    parser.add_argument(
323        "--graph-image",
324        help="Provide the output image name for the graph representation of the order files.")
325
326    return parser.parse_args()
327
328def removeCycles(graph: Graph) -> None:
329    # Remove cycles created by combining order files
330    for cycleList in graph.getCycles():
331        # Get the sum of in edge weights for all vertices in the cycle
332        # We exclude the cycle edges from the calculation
333        # For example, cycle = [a,b,c] where cycle_edges=[a->b, b->c, c->a]
334        # in_edges(a) = [main, c]
335        # in_edges(b) = [a]
336        # in_edges(c) = [b]
337        #
338        # Excluding cycle edges:
339        # in_edges(a) = [main] = 1
340        # in_edges(b) = [] = 0
341        # in_edges(c) = [] = 0
342        inner_edges = [graph.getInEdges(vertex.name) for vertex in cycleList]
343        inner_weights = []
344        for inner_edge in inner_edges:
345            total = 0
346            for edge in inner_edge:
347                if edge[0] not in cycleList:
348                    total += edge[1]
349            inner_weights.append(total)
350
351        # We remove the cycle edge that leads to the highest sum of in-edges for a vertex
352        # because the vertex has other options for ordering.
353        # In the above example, we remove c->a
354        max_inner_weight = max(inner_weights)
355        index = inner_weights.index(max_inner_weight)
356        prev = index - 1
357        if prev < 0:
358            prev = len(inner_weights) - 1
359        to_vertex = cycleList[index]
360        from_vertex = cycleList[prev]
361
362        graph.removeEdgeCompletely(from_vertex.name, to_vertex.name)
363
364def addSymbolsToGraph(graph: Graph, order: list[str], weight: int = 1) -> None:
365    prev_symbol = None
366    for symbol in order:
367        graph.addVertex(symbol)
368
369        if prev_symbol is not None:
370            for i in range(weight):
371                graph.addEdge(prev_symbol, symbol)
372
373        prev_symbol = symbol
374
375def createGraph(files: list[str]) -> Graph:
376    graph = Graph()
377
378    # Create graph representation based on combining the order files
379    for (orderfile, weight) in files:
380        with open(orderfile, "r", encoding="utf-8") as f:
381            lst = []
382            for line in f:
383                line = line.strip()
384                lst.append(line)
385
386            addSymbolsToGraph(graph, lst, weight)
387
388    return graph
389
390def main() -> None:
391    args = parse_args()
392
393    files = orderfile_utils.parse_merge_list(args.order_files)
394    graph = createGraph(files)
395
396    # Assert no cycles after removing them
397    removeCycles(graph)
398    assert(len(graph.getCycles()) == 0)
399
400    # Create an image of the graph representation
401    if args.graph_image:
402        graph.exportGraph(args.graph_image)
403
404    # Create order file from the graph structure
405    graph.printOrder(args.output)
406
407if __name__ == '__main__':
408    main()
409