1# coding=utf-8 2# 3# Copyright (c) 2025 Huawei Device Co., Ltd. 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Tools for tracking resources.""" 17 18import logging 19import shutil 20import subprocess 21from collections.abc import Callable 22from dataclasses import dataclass, field 23from enum import Enum, auto 24from pathlib import Path 25from typing import Final 26 27 28class DeploymentMode(Enum): 29 DEV = auto() # Inside the Git repository (repo_root) 30 PKG = auto() # pip install ... (site-packages/taihe) 31 BUNDLE = auto() # Bundled with a python executable (taihe-pkg) 32 33 34# TODO: CLI: override and print the paths 35class ResourceType(Enum): 36 """Identifier of resources.""" 37 38 RUNTIME_SOURCE = "runtime-source" 39 RUNTIME_HEADER = "runtime-header" 40 STDLIB = "stdlib" 41 DOCUMENTATION = "doc" 42 43 # Things that should not be copied to packages should be prefixed with "DEV_" 44 DEV_PANDA_VM = "panda-vm" 45 DEV_ANTLR = "antlr" 46 DEV_PYTHON_BUILD = "python-build" 47 48 def is_packagable(self) -> bool: 49 return not self.name.startswith("DEV_") 50 51 52ResourceT = str | Path | Callable[["ResourceLocator"], Path] 53 54 55class CacheManager: 56 def __init__(self, root_dir: Path): 57 self.root_dir = Path(root_dir) 58 self.root_dir.mkdir(parents=True, exist_ok=True) 59 60 def get_cache_dir(self, cache_key: str) -> Path: 61 cache_dir = self.root_dir / cache_key 62 cache_dir.mkdir(exist_ok=True) 63 return cache_dir 64 65 def fetch_url( 66 self, 67 cache_key: str, 68 url: str, 69 filename: str = "", 70 curl_extra_args: list[str] | None = None, 71 force_download: bool = False, 72 ) -> Path: 73 """Fetches a simple URL to a local cache directory if it doesn't already exist. 74 75 Args: 76 url: The simple, file-based URL to fetch (e.g., "http://example.com/foo.zip") 77 cache_key: Cache directory key 78 filename: Optional custom filename for the downloaded file 79 curl_extra_args: Additional arguments to pass to curl 80 force_download: If True, download even if file already exists 81 82 Returns: 83 Path to the downloaded file 84 85 Raises: 86 ValueError: If URL or cache_dir is invalid 87 subprocess.CalledProcessError: If curl command fails 88 FileNotFoundError: If curl is not available 89 90 Example: 91 fetch_url("hxxp://example.com/foo.zip", cache_dir="bar") 92 --> Downloads to and returns "/path/to/root/.cache/bar/foo.zip" 93 94 fetch_url("hxxp://example.com/foo.zip", cache_dir="bar", filename="baz.zip") 95 --> Downloads to and returns "/path/to/root/.cache/bar/baz.zip" 96 """ 97 if not filename: 98 filename = url.split("/")[-1] 99 output_path = self.get_cache_dir(cache_key) / filename 100 101 # Check if file already exists and we're not forcing download 102 if output_path.exists() and not force_download: 103 logging.debug(f"Skip fetching, already exists: {output_path}") 104 return output_path 105 106 # Prepare curl args 107 curl_args = [ 108 "curl", 109 "--location", # Follow redirects 110 "--fail", # Fail on HTTP errors 111 "--progress-bar", 112 "--retry", 113 "5", # Retry for 5 times 114 "--output", 115 str(output_path), 116 *(curl_extra_args or []), 117 url, 118 ] 119 ok = False 120 try: 121 logging.info(f"Downloading {url} to {output_path}") 122 subprocess.run(curl_args, check=True) 123 logging.info(f"Successfully downloaded to {output_path}") 124 ok = True 125 return output_path 126 except subprocess.CalledProcessError as e: 127 logging.error(f"curl failed for {url}") 128 raise RuntimeError(f"Failed to download {url}") from e 129 except FileNotFoundError as e: 130 raise FileNotFoundError("curl command not found.") from e 131 finally: 132 if not ok: 133 output_path.unlink(missing_ok=True) 134 135 def fetch_git_repo( 136 self, cache_key: str, url: str, force_refresh: bool = False 137 ) -> Path: 138 """Clone or update a git repository.""" 139 repo_dir = self.get_cache_dir(cache_key) 140 141 def git(args: list[str]): 142 try: 143 subprocess.run(["git", *args], cwd=repo_dir, check=True) 144 except subprocess.CalledProcessError as e: 145 logging.error(f"Git command failed: {e}") 146 raise 147 148 if force_refresh and repo_dir.exists(): 149 shutil.rmtree(repo_dir) 150 repo_dir.mkdir() 151 152 if not (repo_dir / ".git").exists(): 153 logging.info(f"Downloading repo from {url}") 154 git(["clone", url, str(repo_dir)]) 155 return repo_dir 156 157 158@dataclass 159class ResourceLocator: 160 mode: DeploymentMode 161 root_dir: Path = field(default_factory=Path) 162 caches: CacheManager = field(init=False) 163 164 # Path means a overridden value. 165 # str means a pre-configured relative path. 166 _layout: dict[ResourceType, ResourceT] = field(init=False) 167 168 def __post_init__(self): 169 # Clone the configuration for later modification. 170 self._layout = dict(_MODE_TO_LAYOUT[self.mode]) 171 self.caches = CacheManager(self._get_cache_root()) 172 173 def _get_cache_root(self) -> Path: 174 cache_dirs = { 175 DeploymentMode.DEV: self.root_dir / ".cache", 176 DeploymentMode.PKG: Path("~/.cache/taihe").expanduser(), 177 DeploymentMode.BUNDLE: self.root_dir / "cache", 178 } 179 return cache_dirs[self.mode] 180 181 def get(self, t: ResourceType) -> Path: 182 descriptor = self._layout[t] 183 match descriptor: 184 case Path(): 185 return descriptor 186 case str(): 187 resolved = self.root_dir / descriptor 188 case Callable(): 189 resolved = descriptor(self) 190 self._layout[t] = resolved 191 return resolved 192 193 def override(self, t: ResourceType, p: Path): 194 self._layout[t] = p.resolve() 195 196 @classmethod 197 def detect(cls, file: str = __file__): 198 # The directory looks like: 199 # 7 6 5 4 3 2 1 0 200 # repo_root/ compiler/taihe/utils/resources.py 201 # .venv/lib/python3.12/site-packages/taihe/utils/resources.py 202 # taihe/lib/ pyrt/lib/python3.11/site-packages/taihe/utils/resources.py 203 # ^^^^ ^^^^^^^^^^^^^ 204 # python_runtime_dir repo_dir 205 # 206 # We use the heuristics based on the name of repository and python runtime. 207 DEPTH_REPO = 2 208 DEPTH_PYRT = 5 209 DEPTH_PKG_ROOT = 7 210 parents = Path(file).absolute().parents 211 212 def get(i: int) -> Path: 213 if i < len(parents): 214 return parents[i] 215 return Path() 216 217 repo_dir = get(DEPTH_REPO) 218 if repo_dir.name == "compiler": 219 return ResourceLocator(DeploymentMode.DEV, get(DEPTH_REPO + 1)) 220 221 if repo_dir.name == "site-packages": 222 if get(DEPTH_PYRT).name == BUNDLE_PYTHON_RUNTIME_DIR_NAME: 223 return ResourceLocator(DeploymentMode.BUNDLE, get(DEPTH_PKG_ROOT)) 224 else: 225 return ResourceLocator(DeploymentMode.PKG, get(DEPTH_REPO - 1)) 226 227 raise RuntimeError(f"cannot determine deployment layout ({repo_dir=})") 228 229 230BUNDLE_PYTHON_RUNTIME_DIR_NAME: Final = "pyrt" 231 232ANTLR_VERSION: Final = "4.13.2" 233ANTLR_MAVEN_REPO: Final = "https://mirrors.huaweicloud.com/repository/maven" 234 235PYTHON_REPO_URL: Final = "https://gitee.com/ASeaSalt/python-multi-platform.git" 236 237 238def _resolve_antlr(locator: ResourceLocator) -> Path: 239 url = f"{ANTLR_MAVEN_REPO}/org/antlr/antlr4/{ANTLR_VERSION}/antlr4-{ANTLR_VERSION}-complete.jar" 240 return locator.caches.fetch_url(cache_key="antlr", url=url) 241 242 243def _resolve_python_build(locator: ResourceLocator) -> Path: 244 return locator.caches.fetch_git_repo( 245 cache_key="python-packages", url=PYTHON_REPO_URL 246 ) 247 248 249_MODE_TO_LAYOUT: Final[dict[DeploymentMode, dict[ResourceType, ResourceT]]] = { 250 DeploymentMode.DEV: { 251 ResourceType.RUNTIME_SOURCE: "runtime/src", 252 ResourceType.RUNTIME_HEADER: "runtime/include", 253 ResourceType.STDLIB: "stdlib", 254 ResourceType.DOCUMENTATION: "cookbook", 255 ResourceType.DEV_PANDA_VM: ".panda_vm", 256 ResourceType.DEV_ANTLR: _resolve_antlr, 257 ResourceType.DEV_PYTHON_BUILD: _resolve_python_build, 258 }, 259 # Python packaging is not supported yet 260 DeploymentMode.PKG: {}, 261 DeploymentMode.BUNDLE: { 262 ResourceType.RUNTIME_SOURCE: "src/taihe/runtime", 263 ResourceType.RUNTIME_HEADER: "include", 264 ResourceType.STDLIB: "lib/taihe/stdlib", 265 ResourceType.DOCUMENTATION: "share/doc/taihe", 266 ResourceType.DEV_PANDA_VM: "var/lib/panda_vm", 267 }, 268}