1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Module to update packages from GitHub archive.""" 15 16import json 17import os 18import re 19import urllib.request 20import urllib.error 21from pathlib import Path 22from typing import List, Optional, Tuple 23 24import archive_utils 25from base_updater import Updater 26import git_utils 27# pylint: disable=import-error 28import updater_utils 29GITHUB_URL_PATTERN: str = (r'^https:\/\/github.com\/([-\w]+)\/([-\w]+)\/' + 30 r'(releases\/download\/|archive\/)') 31GITHUB_URL_RE: re.Pattern = re.compile(GITHUB_URL_PATTERN) 32 33 34def _edit_distance(str1: str, str2: str) -> int: 35 prev = list(range(0, len(str2) + 1)) 36 for i, chr1 in enumerate(str1): 37 cur = [i + 1] 38 for j, chr2 in enumerate(str2): 39 if chr1 == chr2: 40 cur.append(prev[j]) 41 else: 42 cur.append(min(prev[j + 1], prev[j], cur[j]) + 1) 43 prev = cur 44 return prev[len(str2)] 45 46 47def choose_best_url(urls: List[str], previous_url: str) -> str: 48 """Returns the best url to download from a list of candidate urls. 49 50 This function calculates similarity between previous url and each of new 51 urls. And returns the one best matches previous url. 52 53 Similarity is measured by editing distance. 54 55 Args: 56 urls: Array of candidate urls. 57 previous_url: String of the url used previously. 58 59 Returns: 60 One url from `urls`. 61 """ 62 return min(urls, 63 default="", 64 key=lambda url: _edit_distance(url, previous_url)) 65 66 67class GithubArchiveUpdater(Updater): 68 """Updater for archives from GitHub. 69 70 This updater supports release archives in GitHub. Version is determined by 71 release name in GitHub. 72 """ 73 74 UPSTREAM_REMOTE_NAME: str = "update_origin" 75 VERSION_FIELD: str = 'tag_name' 76 owner: str 77 repo: str 78 79 def is_supported_url(self) -> bool: 80 if self._old_identifier.type.lower() != 'archive': 81 return False 82 match = GITHUB_URL_RE.match(self._old_identifier.value) 83 if match is None: 84 return False 85 try: 86 self.owner, self.repo = match.group(1, 2) 87 except IndexError: 88 return False 89 return True 90 91 def _fetch_latest_release(self) -> Optional[Tuple[str, List[str]]]: 92 # pylint: disable=line-too-long 93 url = f'https://api.github.com/repos/{self.owner}/{self.repo}/releases/latest' 94 try: 95 with urllib.request.urlopen(url) as request: 96 data = json.loads(request.read().decode()) 97 except urllib.error.HTTPError as err: 98 if err.code == 404: 99 return None 100 raise 101 supported_assets = [ 102 a['browser_download_url'] for a in data['assets'] 103 if archive_utils.is_supported_archive(a['browser_download_url']) 104 ] 105 return data[self.VERSION_FIELD], supported_assets 106 107 def setup_remote(self) -> None: 108 homepage = f'https://github.com/{self.owner}/{self.repo}' 109 remotes = git_utils.list_remotes(self._proj_path) 110 current_remote_url = None 111 for name, url in remotes.items(): 112 if name == self.UPSTREAM_REMOTE_NAME: 113 current_remote_url = url 114 115 if current_remote_url is not None and current_remote_url != homepage: 116 git_utils.remove_remote(self._proj_path, self.UPSTREAM_REMOTE_NAME) 117 current_remote_url = None 118 119 if current_remote_url is None: 120 git_utils.add_remote(self._proj_path, self.UPSTREAM_REMOTE_NAME, homepage) 121 122 git_utils.fetch(self._proj_path, self.UPSTREAM_REMOTE_NAME) 123 124 def create_tar_gz_url(self) -> str: 125 url = f'https://github.com/{self.owner}/{self.repo}/archive/' \ 126 f'{self._new_identifier.version}.tar.gz' 127 return url 128 129 def create_zip_url(self) -> str: 130 url = f'https://github.com/{self.owner}/{self.repo}/archive/' \ 131 f'{self._new_identifier.version}.zip' 132 return url 133 134 def _fetch_latest_tag(self) -> Tuple[str, List[str]]: 135 """We want to avoid hitting GitHub API rate limit by using alternative solutions.""" 136 tags = git_utils.list_remote_tags(self._proj_path, self.UPSTREAM_REMOTE_NAME) 137 parsed_tags = [updater_utils.parse_remote_tag(tag) for tag in tags] 138 tag = updater_utils.get_latest_stable_release_tag(self._old_identifier.version, parsed_tags) 139 return tag, [] 140 141 def _fetch_latest_tag_or_release(self) -> None: 142 """Checks upstream and gets the latest release tag.""" 143 self._new_identifier.version, urls = (self._fetch_latest_release() 144 or self._fetch_latest_tag()) 145 146 # Adds source code urls. 147 urls.append(self.create_tar_gz_url()) 148 urls.append(self.create_zip_url()) 149 150 self._new_identifier.value = choose_best_url(urls, self._old_identifier.value) 151 152 def _fetch_latest_commit(self) -> None: 153 """Checks upstream and gets the latest commit to default branch.""" 154 155 # pylint: disable=line-too-long 156 branch = git_utils.detect_default_branch(self._proj_path, 157 self.UPSTREAM_REMOTE_NAME) 158 self._new_identifier.version = git_utils.get_sha_for_branch( 159 self._proj_path, self.UPSTREAM_REMOTE_NAME + '/' + branch) 160 self._new_identifier.value = ( 161 # pylint: disable=line-too-long 162 f'https://github.com/{self.owner}/{self.repo}/archive/{self._new_identifier.version}.zip' 163 ) 164 165 def set_custom_version(self, custom_version: str) -> None: 166 super().set_custom_version(custom_version) 167 tar_gz_url = self.create_tar_gz_url() 168 zip_url = self.create_zip_url() 169 self._new_identifier.value = choose_best_url([tar_gz_url, zip_url], self._old_identifier.value) 170 171 def check(self) -> None: 172 """Checks update for package. 173 174 Returns True if a new version is available. 175 """ 176 self.setup_remote() 177 178 if git_utils.is_commit(self._old_identifier.version): 179 self._fetch_latest_commit() 180 else: 181 self._fetch_latest_tag_or_release() 182 183 def update(self) -> Path: 184 """Updates the package. 185 186 Has to call check() before this function. 187 """ 188 temporary_dir = None 189 try: 190 temporary_dir = archive_utils.download_and_extract( 191 self._new_identifier.value) 192 package_dir = archive_utils.find_archive_root(temporary_dir) 193 updater_utils.replace_package(package_dir, self._proj_path) 194 # package_dir contains the old version of the project. This is 195 # returned in case a project needs a post_update.sh script. 196 return os.path.normpath(package_dir) 197 finally: 198 # Don't remove the temporary directory, or it'll be impossible 199 # to debug the failure... 200 # shutil.rmtree(temporary_dir, ignore_errors=True) 201 urllib.request.urlcleanup() 202