1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Module to update packages from GitHub archive.""" 15 16import json 17import re 18import time 19import urllib.request 20import urllib.error 21from typing import List, Optional, Tuple 22 23import archive_utils 24from base_updater import Updater 25import git_utils 26# pylint: disable=import-error 27import metadata_pb2 # type: ignore 28import updater_utils 29 30GITHUB_URL_PATTERN: str = (r'^https:\/\/github.com\/([-\w]+)\/([-\w]+)\/' + 31 r'(releases\/download\/|archive\/)') 32GITHUB_URL_RE: re.Pattern = re.compile(GITHUB_URL_PATTERN) 33 34 35def _edit_distance(str1: str, str2: str) -> int: 36 prev = list(range(0, len(str2) + 1)) 37 for i, chr1 in enumerate(str1): 38 cur = [i + 1] 39 for j, chr2 in enumerate(str2): 40 if chr1 == chr2: 41 cur.append(prev[j]) 42 else: 43 cur.append(min(prev[j + 1], prev[j], cur[j]) + 1) 44 prev = cur 45 return prev[len(str2)] 46 47 48def choose_best_url(urls: List[str], previous_url: str) -> str: 49 """Returns the best url to download from a list of candidate urls. 50 51 This function calculates similarity between previous url and each of new 52 urls. And returns the one best matches previous url. 53 54 Similarity is measured by editing distance. 55 56 Args: 57 urls: Array of candidate urls. 58 previous_url: String of the url used previously. 59 60 Returns: 61 One url from `urls`. 62 """ 63 return min(urls, 64 default="", 65 key=lambda url: _edit_distance(url, previous_url)) 66 67 68class GithubArchiveUpdater(Updater): 69 """Updater for archives from GitHub. 70 71 This updater supports release archives in GitHub. Version is determined by 72 release name in GitHub. 73 """ 74 75 VERSION_FIELD: str = 'tag_name' 76 owner: str 77 repo: str 78 79 def is_supported_url(self) -> bool: 80 if self._old_url.type != metadata_pb2.URL.ARCHIVE: 81 return False 82 match = GITHUB_URL_RE.match(self._old_url.value) 83 if match is None: 84 return False 85 try: 86 self.owner, self.repo = match.group(1, 2) 87 except IndexError: 88 return False 89 return True 90 91 def _fetch_latest_release(self) -> Optional[Tuple[str, List[str]]]: 92 # pylint: disable=line-too-long 93 url = f'https://api.github.com/repos/{self.owner}/{self.repo}/releases/latest' 94 try: 95 with urllib.request.urlopen(url) as request: 96 data = json.loads(request.read().decode()) 97 except urllib.error.HTTPError as err: 98 if err.code == 404: 99 return None 100 raise 101 supported_assets = [ 102 a['browser_download_url'] for a in data['assets'] 103 if archive_utils.is_supported_archive(a['browser_download_url']) 104 ] 105 return (data[self.VERSION_FIELD], supported_assets) 106 107 def _fetch_latest_tag(self) -> Tuple[str, List[str]]: 108 page = 1 109 tags: List[str] = [] 110 # fetches at most 20 pages. 111 for page in range(1, 21): 112 # Sleeps 10s to avoid rate limit. 113 time.sleep(10) 114 # pylint: disable=line-too-long 115 url = f'https://api.github.com/repos/{self.owner}/{self.repo}/tags?page={page}' 116 with urllib.request.urlopen(url) as request: 117 data = json.loads(request.read().decode()) 118 if len(data) == 0: 119 break 120 tags.extend(d['name'] for d in data) 121 return (updater_utils.get_latest_version(self._old_ver, tags), []) 122 123 def _fetch_latest_version(self) -> None: 124 """Checks upstream and gets the latest release tag.""" 125 self._new_ver, urls = (self._fetch_latest_release() 126 or self._fetch_latest_tag()) 127 128 # Adds source code urls. 129 urls.append(f'https://github.com/{self.owner}/{self.repo}/archive/{self._new_ver}.tar.gz') 130 urls.append(f'https://github.com/{self.owner}/{self.repo}/archive/{self._new_ver}.zip') 131 132 self._new_url.value = choose_best_url(urls, self._old_url.value) 133 134 def _fetch_latest_commit(self) -> None: 135 """Checks upstream and gets the latest commit to master.""" 136 137 # pylint: disable=line-too-long 138 url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits/master' 139 with urllib.request.urlopen(url) as request: 140 data = json.loads(request.read().decode()) 141 self._new_ver = data['sha'] 142 self._new_url.value = ( 143 # pylint: disable=line-too-long 144 f'https://github.com/{self.owner}/{self.repo}/archive/{self._new_ver}.zip' 145 ) 146 147 def check(self) -> None: 148 """Checks update for package. 149 150 Returns True if a new version is available. 151 """ 152 if git_utils.is_commit(self._old_ver): 153 self._fetch_latest_commit() 154 else: 155 self._fetch_latest_version() 156 157 def update(self, skip_post_update: bool) -> None: 158 """Updates the package. 159 160 Has to call check() before this function. 161 """ 162 temporary_dir = None 163 try: 164 temporary_dir = archive_utils.download_and_extract( 165 self._new_url.value) 166 package_dir = archive_utils.find_archive_root(temporary_dir) 167 updater_utils.replace_package(package_dir, self._proj_path) 168 finally: 169 # Don't remove the temporary directory, or it'll be impossible 170 # to debug the failure... 171 # shutil.rmtree(temporary_dir, ignore_errors=True) 172 urllib.request.urlcleanup() 173