• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# coding=utf-8
2#
3# Copyright (c) 2025 Huawei Device Co., Ltd.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Tools for tracking resources."""
17
18import logging
19import shutil
20import subprocess
21from collections.abc import Callable
22from dataclasses import dataclass, field
23from enum import Enum, auto
24from pathlib import Path
25from typing import Final
26
27
28class DeploymentMode(Enum):
29    DEV = auto()  # Inside the Git repository (repo_root)
30    PKG = auto()  # pip install ... (site-packages/taihe)
31    BUNDLE = auto()  # Bundled with a python executable (taihe-pkg)
32
33
34# TODO: CLI: override and print the paths
35class ResourceType(Enum):
36    """Identifier of resources."""
37
38    RUNTIME_SOURCE = "runtime-source"
39    RUNTIME_HEADER = "runtime-header"
40    STDLIB = "stdlib"
41    DOCUMENTATION = "doc"
42
43    # Things that should not be copied to packages should be prefixed with "DEV_"
44    DEV_PANDA_VM = "panda-vm"
45    DEV_ANTLR = "antlr"
46    DEV_PYTHON_BUILD = "python-build"
47
48    def is_packagable(self) -> bool:
49        return not self.name.startswith("DEV_")
50
51
52ResourceT = str | Path | Callable[["ResourceLocator"], Path]
53
54
55class CacheManager:
56    def __init__(self, root_dir: Path):
57        self.root_dir = Path(root_dir)
58        self.root_dir.mkdir(parents=True, exist_ok=True)
59
60    def get_cache_dir(self, cache_key: str) -> Path:
61        cache_dir = self.root_dir / cache_key
62        cache_dir.mkdir(exist_ok=True)
63        return cache_dir
64
65    def fetch_url(
66        self,
67        cache_key: str,
68        url: str,
69        filename: str = "",
70        curl_extra_args: list[str] | None = None,
71        force_download: bool = False,
72    ) -> Path:
73        """Fetches a simple URL to a local cache directory if it doesn't already exist.
74
75        Args:
76            url: The simple, file-based URL to fetch (e.g., "http://example.com/foo.zip")
77            cache_key: Cache directory key
78            filename: Optional custom filename for the downloaded file
79            curl_extra_args: Additional arguments to pass to curl
80            force_download: If True, download even if file already exists
81
82        Returns:
83            Path to the downloaded file
84
85        Raises:
86            ValueError: If URL or cache_dir is invalid
87            subprocess.CalledProcessError: If curl command fails
88            FileNotFoundError: If curl is not available
89
90        Example:
91            fetch_url("hxxp://example.com/foo.zip", cache_dir="bar")
92            --> Downloads to and returns "/path/to/root/.cache/bar/foo.zip"
93
94            fetch_url("hxxp://example.com/foo.zip", cache_dir="bar", filename="baz.zip")
95            --> Downloads to and returns "/path/to/root/.cache/bar/baz.zip"
96        """
97        if not filename:
98            filename = url.split("/")[-1]
99        output_path = self.get_cache_dir(cache_key) / filename
100
101        # Check if file already exists and we're not forcing download
102        if output_path.exists() and not force_download:
103            logging.debug(f"Skip fetching, already exists: {output_path}")
104            return output_path
105
106        # Prepare curl args
107        curl_args = [
108            "curl",
109            "--location",  # Follow redirects
110            "--fail",  # Fail on HTTP errors
111            "--progress-bar",
112            "--retry",
113            "5",  # Retry for 5 times
114            "--output",
115            str(output_path),
116            *(curl_extra_args or []),
117            url,
118        ]
119        ok = False
120        try:
121            logging.info(f"Downloading {url} to {output_path}")
122            subprocess.run(curl_args, check=True)
123            logging.info(f"Successfully downloaded to {output_path}")
124            ok = True
125            return output_path
126        except subprocess.CalledProcessError as e:
127            logging.error(f"curl failed for {url}")
128            raise RuntimeError(f"Failed to download {url}") from e
129        except FileNotFoundError as e:
130            raise FileNotFoundError("curl command not found.") from e
131        finally:
132            if not ok:
133                output_path.unlink(missing_ok=True)
134
135    def fetch_git_repo(
136        self, cache_key: str, url: str, force_refresh: bool = False
137    ) -> Path:
138        """Clone or update a git repository."""
139        repo_dir = self.get_cache_dir(cache_key)
140
141        def git(args: list[str]):
142            try:
143                subprocess.run(["git", *args], cwd=repo_dir, check=True)
144            except subprocess.CalledProcessError as e:
145                logging.error(f"Git command failed: {e}")
146                raise
147
148        if force_refresh and repo_dir.exists():
149            shutil.rmtree(repo_dir)
150            repo_dir.mkdir()
151
152        if not (repo_dir / ".git").exists():
153            logging.info(f"Downloading repo from {url}")
154            git(["clone", url, str(repo_dir)])
155        return repo_dir
156
157
158@dataclass
159class ResourceLocator:
160    mode: DeploymentMode
161    root_dir: Path = field(default_factory=Path)
162    caches: CacheManager = field(init=False)
163
164    # Path means a overridden value.
165    # str means a pre-configured relative path.
166    _layout: dict[ResourceType, ResourceT] = field(init=False)
167
168    def __post_init__(self):
169        # Clone the configuration for later modification.
170        self._layout = dict(_MODE_TO_LAYOUT[self.mode])
171        self.caches = CacheManager(self._get_cache_root())
172
173    def _get_cache_root(self) -> Path:
174        cache_dirs = {
175            DeploymentMode.DEV: self.root_dir / ".cache",
176            DeploymentMode.PKG: Path("~/.cache/taihe").expanduser(),
177            DeploymentMode.BUNDLE: self.root_dir / "cache",
178        }
179        return cache_dirs[self.mode]
180
181    def get(self, t: ResourceType) -> Path:
182        descriptor = self._layout[t]
183        match descriptor:
184            case Path():
185                return descriptor
186            case str():
187                resolved = self.root_dir / descriptor
188            case Callable():
189                resolved = descriptor(self)
190        self._layout[t] = resolved
191        return resolved
192
193    def override(self, t: ResourceType, p: Path):
194        self._layout[t] = p.resolve()
195
196    @classmethod
197    def detect(cls, file: str = __file__):
198        # The directory looks like:
199        #   7    6     5   4      3            2         1     0
200        #                      repo_root/     compiler/taihe/utils/resources.py
201        #           .venv/lib/python3.12/site-packages/taihe/utils/resources.py
202        # taihe/lib/ pyrt/lib/python3.11/site-packages/taihe/utils/resources.py
203        #            ^^^^                ^^^^^^^^^^^^^
204        #     python_runtime_dir            repo_dir
205        #
206        # We use the heuristics based on the name of repository and python runtime.
207        DEPTH_REPO = 2
208        DEPTH_PYRT = 5
209        DEPTH_PKG_ROOT = 7
210        parents = Path(file).absolute().parents
211
212        def get(i: int) -> Path:
213            if i < len(parents):
214                return parents[i]
215            return Path()
216
217        repo_dir = get(DEPTH_REPO)
218        if repo_dir.name == "compiler":
219            return ResourceLocator(DeploymentMode.DEV, get(DEPTH_REPO + 1))
220
221        if repo_dir.name == "site-packages":
222            if get(DEPTH_PYRT).name == BUNDLE_PYTHON_RUNTIME_DIR_NAME:
223                return ResourceLocator(DeploymentMode.BUNDLE, get(DEPTH_PKG_ROOT))
224            else:
225                return ResourceLocator(DeploymentMode.PKG, get(DEPTH_REPO - 1))
226
227        raise RuntimeError(f"cannot determine deployment layout ({repo_dir=})")
228
229
230BUNDLE_PYTHON_RUNTIME_DIR_NAME: Final = "pyrt"
231
232ANTLR_VERSION: Final = "4.13.2"
233ANTLR_MAVEN_REPO: Final = "https://mirrors.huaweicloud.com/repository/maven"
234
235PYTHON_REPO_URL: Final = "https://gitee.com/ASeaSalt/python-multi-platform.git"
236
237
238def _resolve_antlr(locator: ResourceLocator) -> Path:
239    url = f"{ANTLR_MAVEN_REPO}/org/antlr/antlr4/{ANTLR_VERSION}/antlr4-{ANTLR_VERSION}-complete.jar"
240    return locator.caches.fetch_url(cache_key="antlr", url=url)
241
242
243def _resolve_python_build(locator: ResourceLocator) -> Path:
244    return locator.caches.fetch_git_repo(
245        cache_key="python-packages", url=PYTHON_REPO_URL
246    )
247
248
249_MODE_TO_LAYOUT: Final[dict[DeploymentMode, dict[ResourceType, ResourceT]]] = {
250    DeploymentMode.DEV: {
251        ResourceType.RUNTIME_SOURCE: "runtime/src",
252        ResourceType.RUNTIME_HEADER: "runtime/include",
253        ResourceType.STDLIB: "stdlib",
254        ResourceType.DOCUMENTATION: "cookbook",
255        ResourceType.DEV_PANDA_VM: ".panda_vm",
256        ResourceType.DEV_ANTLR: _resolve_antlr,
257        ResourceType.DEV_PYTHON_BUILD: _resolve_python_build,
258    },
259    # Python packaging is not supported yet
260    DeploymentMode.PKG: {},
261    DeploymentMode.BUNDLE: {
262        ResourceType.RUNTIME_SOURCE: "src/taihe/runtime",
263        ResourceType.RUNTIME_HEADER: "include",
264        ResourceType.STDLIB: "lib/taihe/stdlib",
265        ResourceType.DOCUMENTATION: "share/doc/taihe",
266        ResourceType.DEV_PANDA_VM: "var/lib/panda_vm",
267    },
268}