1# Copyright (c) Meta Platforms, Inc. and affiliates. 2# All rights reserved. 3# 4# This source code is licensed under the BSD-style license found in the 5# LICENSE file in the root directory of this source tree. 6 7import gc 8import os 9import pickle 10import re 11import time 12import typing 13from collections import defaultdict 14from typing import Any, Dict, List, Optional, Set, Tuple, Union 15 16 17def read_dump(prefix: str, filename: str) -> Dict[str, Union[str, int, List[Any]]]: 18 basename = os.path.basename(filename) 19 20 rank = int(basename[len(prefix) :]) 21 host_name = f"host_rank{rank}" 22 23 with open(filename, "rb") as infile: 24 dump = pickle.load(infile) 25 26 entries = dump["entries"] 27 version = dump["version"] 28 pg_config = dump["pg_config"] 29 30 return { 31 "host_name": host_name, 32 "rank": rank, 33 "entries": entries, 34 "version": version, 35 "pg_config": pg_config, 36 } 37 38 39exp = re.compile(r"([\w\-\_]*?)(\d+)$") 40 41 42def _determine_prefix(files: List[str]) -> str: 43 """If the user doesn't specify a prefix, but does pass a dir full of similarly-prefixed files, we should be able to 44 infer the common prefix most of the time. But if we can't confidently infer, just fall back to requring the user 45 to specify it 46 """ 47 possible_prefixes: typing.DefaultDict[str, Set[int]] = defaultdict(set) 48 for f in files: 49 m = exp.search(f) 50 if m: 51 p, r = m.groups() 52 possible_prefixes[p].add(int(r)) 53 if len(possible_prefixes) == 1: 54 prefix = next(iter(possible_prefixes)) 55 print(f"Inferred common prefix {prefix}") 56 return prefix 57 else: 58 raise ValueError( 59 "Unable to automatically determine the common prefix for the trace file names. " 60 "Please specify --prefix argument manually" 61 ) 62 63 64def read_dir( 65 prefix: Optional[str], folder: str 66) -> Tuple[Dict[str, Dict[str, Any]], str]: 67 gc.disable() 68 details = {} 69 t0 = time.time() 70 version = "" 71 for root, _, files in os.walk(folder): 72 if prefix is None: 73 prefix = _determine_prefix(files) 74 for f in files: 75 if f.find(prefix) != 0: 76 continue 77 details[f] = read_dump(prefix, os.path.join(root, f)) 78 if not version: 79 version = str(details[f]["version"]) 80 tb = time.time() 81 print(f"loaded {len(files)} files in {tb - t0}s") 82 return details, version 83