• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2# All rights reserved.
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7import gc
8import os
9import pickle
10import re
11import time
12import typing
13from collections import defaultdict
14from typing import Any, Dict, List, Optional, Set, Tuple, Union
15
16
17def read_dump(prefix: str, filename: str) -> Dict[str, Union[str, int, List[Any]]]:
18    basename = os.path.basename(filename)
19
20    rank = int(basename[len(prefix) :])
21    host_name = f"host_rank{rank}"
22
23    with open(filename, "rb") as infile:
24        dump = pickle.load(infile)
25
26    entries = dump["entries"]
27    version = dump["version"]
28    pg_config = dump["pg_config"]
29
30    return {
31        "host_name": host_name,
32        "rank": rank,
33        "entries": entries,
34        "version": version,
35        "pg_config": pg_config,
36    }
37
38
39exp = re.compile(r"([\w\-\_]*?)(\d+)$")
40
41
42def _determine_prefix(files: List[str]) -> str:
43    """If the user doesn't specify a prefix, but does pass a dir full of similarly-prefixed files, we should be able to
44    infer the common prefix most of the time.  But if we can't confidently infer, just fall back to requring the user
45    to specify it
46    """
47    possible_prefixes: typing.DefaultDict[str, Set[int]] = defaultdict(set)
48    for f in files:
49        m = exp.search(f)
50        if m:
51            p, r = m.groups()
52            possible_prefixes[p].add(int(r))
53    if len(possible_prefixes) == 1:
54        prefix = next(iter(possible_prefixes))
55        print(f"Inferred common prefix {prefix}")
56        return prefix
57    else:
58        raise ValueError(
59            "Unable to automatically determine the common prefix for the trace file names. "
60            "Please specify --prefix argument manually"
61        )
62
63
64def read_dir(
65    prefix: Optional[str], folder: str
66) -> Tuple[Dict[str, Dict[str, Any]], str]:
67    gc.disable()
68    details = {}
69    t0 = time.time()
70    version = ""
71    for root, _, files in os.walk(folder):
72        if prefix is None:
73            prefix = _determine_prefix(files)
74        for f in files:
75            if f.find(prefix) != 0:
76                continue
77            details[f] = read_dump(prefix, os.path.join(root, f))
78            if not version:
79                version = str(details[f]["version"])
80    tb = time.time()
81    print(f"loaded {len(files)} files in {tb - t0}s")
82    return details, version
83