1#!/usr/bin/env python 2# 3# Copyright (C) 2019 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""deapexer is a tool that prints out content of an APEX. 17 18To print content of an APEX to stdout: 19 deapexer list foo.apex 20 21To extract content of an APEX to the given directory: 22 deapexer extract foo.apex dest 23""" 24from __future__ import print_function 25 26import argparse 27import apex_manifest 28import enum 29import os 30import re 31import shutil 32import sys 33import subprocess 34import tempfile 35import zipfile 36 37BLOCK_SIZE = 4096 38 39# See apexd/apex_file.cpp#RetrieveFsType 40FS_TYPES = [ 41 ('f2fs', 1024, b'\x10\x20\xf5\xf2'), 42 ('ext4', 1024 + 0x38, b'\123\357'), 43 ('erofs', 1024, b'\xe2\xe1\xf5\xe0'), 44] 45 46 47def RetrieveFileSystemType(file): 48 """Returns filesystem type with magic""" 49 with open(file, 'rb') as f: 50 for fs_type, offset, magic in FS_TYPES: 51 buf = bytearray(len(magic)) 52 f.seek(offset, os.SEEK_SET) 53 f.readinto(buf) 54 if buf == magic: 55 return fs_type 56 raise ValueError('Failed to retrieve filesystem type') 57 58class ApexImageEntry(object): 59 """Represents an entry in APEX payload""" 60 def __init__(self, name, *, base_dir, permissions, size, ino, extents, 61 is_directory, is_symlink, security_context): 62 self._name = name 63 self._base_dir = base_dir 64 self._permissions = permissions 65 self._size = size 66 self._is_directory = is_directory 67 self._is_symlink = is_symlink 68 self._ino = ino 69 self._extents = extents 70 self._security_context = security_context 71 self._entries = [] 72 73 @property 74 def name(self): 75 return self._name 76 77 @property 78 def root(self): 79 return self._base_dir == './' and self._name == '.' 80 81 @property 82 def full_path(self): 83 if self.root: 84 return self._base_dir # './' 85 path = os.path.join(self._base_dir, self._name) 86 if self.is_directory: 87 path += '/' 88 return path 89 90 @property 91 def is_directory(self): 92 return self._is_directory 93 94 @property 95 def is_symlink(self): 96 return self._is_symlink 97 98 @property 99 def is_regular_file(self): 100 return not self.is_directory and not self.is_symlink 101 102 @property 103 def permissions(self): 104 return self._permissions 105 106 @property 107 def size(self): 108 return self._size 109 110 @property 111 def ino(self): 112 return self._ino 113 114 @property 115 def entries(self): 116 return self._entries 117 118 @property 119 def extents(self): 120 return self._extents 121 122 @property 123 def security_context(self): 124 return self._security_context 125 126 def __str__(self): 127 ret = '' 128 if self._is_directory: 129 ret += 'd' 130 elif self._is_symlink: 131 ret += 'l' 132 else: 133 ret += '-' 134 135 def MaskAsString(m): 136 ret = 'r' if m & 4 == 4 else '-' 137 ret += 'w' if m & 2 == 2 else '-' 138 ret += 'x' if m & 1 == 1 else '-' 139 return ret 140 141 ret += MaskAsString(self._permissions >> 6) 142 ret += MaskAsString((self._permissions >> 3) & 7) 143 ret += MaskAsString(self._permissions & 7) 144 145 return ret + ' ' + self._size + ' ' + self._name 146 147 148class Apex(object): 149 """Represents an APEX file""" 150 def __init__(self, args): 151 self._debugfs = args.debugfs_path 152 self._fsckerofs = args.fsckerofs_path 153 self._apex = args.apex 154 self._tempdir = tempfile.mkdtemp() 155 with zipfile.ZipFile(self._apex, 'r') as zip_ref: 156 self._payload = zip_ref.extract('apex_payload.img', path=self._tempdir) 157 self._payload_fs_type = RetrieveFileSystemType(self._payload) 158 159 def __del__(self): 160 shutil.rmtree(self._tempdir) 161 162 def __enter__(self): 163 return self 164 165 def __exit__(self, ex_type, value, traceback): 166 pass 167 168 def list(self): 169 if self._payload_fs_type not in ['ext4']: 170 sys.exit(f'{self._payload_fs_type} is not supported for `list`.') 171 172 yield from self.entries() 173 174 def read_dir(self, path) -> ApexImageEntry: 175 assert path.endswith('/') 176 assert self.payload_fs_type == 'ext4' 177 178 res = subprocess.check_output([self._debugfs, '-R', f'ls -l -p {path}', self._payload], 179 text=True, stderr=subprocess.DEVNULL) 180 dir_entry = None 181 entries = [] 182 for line in res.split('\n'): 183 if not line: 184 continue 185 parts = line.split('/') 186 if len(parts) != 8: 187 continue 188 name = parts[5] 189 if not name: 190 continue 191 if name == '..': 192 continue 193 if name == 'lost+found' and path == './': 194 continue 195 ino = parts[1] 196 bits = parts[2] 197 size = parts[6] 198 extents = [] 199 is_symlink = bits[1]=='2' 200 is_directory=bits[1]=='4' 201 202 if not is_symlink and not is_directory: 203 stdout = subprocess.check_output([self._debugfs, '-R', f'dump_extents <{ino}>', 204 self._payload], text=True, stderr=subprocess.DEVNULL) 205 # Output of dump_extents for an inode fragmented in 3 blocks (length and addresses represent 206 # block-sized sections): 207 # Level Entries Logical Physical Length Flags 208 # 0/ 0 1/ 3 0 - 0 18 - 18 1 209 # 0/ 0 2/ 3 1 - 15 20 - 34 15 210 # 0/ 0 3/ 3 16 - 1863 37 - 1884 1848 211 res = stdout.splitlines() 212 res.pop(0) # the first line contains only columns names 213 left_length = int(size) 214 try: # dump_extents sometimes has an unexpected output 215 for line in res: 216 tokens = line.split() 217 offset = int(tokens[7]) * BLOCK_SIZE 218 length = min(int(tokens[-1]) * BLOCK_SIZE, left_length) 219 left_length -= length 220 extents.append((offset, length)) 221 if left_length != 0: # dump_extents sometimes fails to display "hole" blocks 222 raise ValueError 223 except: # pylint: disable=bare-except 224 extents = [] # [] means that we failed to retrieve the file location successfully 225 226 # get 'security.selinux' attribute 227 entry_path = os.path.join(path, name) 228 stdout = subprocess.check_output([ 229 self._debugfs, 230 '-R', 231 f'ea_get -V {entry_path} security.selinux', 232 self._payload 233 ], text=True, stderr=subprocess.DEVNULL) 234 security_context = stdout.rstrip('\n\x00') 235 236 entry = ApexImageEntry(name, 237 base_dir=path, 238 permissions=int(bits[3:], 8), 239 size=size, 240 is_directory=is_directory, 241 is_symlink=is_symlink, 242 ino=ino, 243 extents=extents, 244 security_context=security_context) 245 if name == '.': 246 dir_entry = entry 247 elif is_directory: 248 sub_dir_entry = self.read_dir(path + name + '/') 249 # sub_dir_entry should be the same inode 250 assert entry.ino == sub_dir_entry.ino 251 entry.entries.extend(sub_dir_entry.entries) 252 entries.append(entry) 253 else: 254 entries.append(entry) 255 256 assert dir_entry 257 dir_entry.entries.extend(sorted(entries, key=lambda e: e.name)) 258 return dir_entry 259 260 def extract(self, dest): 261 """Recursively dumps contents of the payload with retaining mode bits, but not owner/group""" 262 if self._payload_fs_type == 'erofs': 263 subprocess.run([self._fsckerofs, f'--extract={dest}', '--overwrite', 264 '--no-preserve-owner', self._payload], stdout=subprocess.DEVNULL, check=True) 265 elif self._payload_fs_type == 'ext4': 266 # Extract entries one by one using `dump` because `rdump` doesn't support 267 # "no-perserve" mode 268 for entry in self.entries(): 269 self.write_entry(entry, dest) 270 else: 271 # TODO(b/279688635) f2fs is not supported yet. 272 sys.exit(f'{self._payload_fs_type} is not supported for `extract`.') 273 274 @property 275 def payload_fs_type(self) -> str: 276 return self._payload_fs_type 277 278 def entries(self): 279 """Generator to visit all entries in the payload starting from root(./)""" 280 281 def TopDown(entry): 282 yield entry 283 for child in entry.entries: 284 yield from TopDown(child) 285 286 root = self.read_dir('./') 287 yield from TopDown(root) 288 289 def read_symlink(self, entry): 290 assert entry.is_symlink 291 assert self.payload_fs_type == 'ext4' 292 293 stdout = subprocess.check_output([self._debugfs, '-R', f'stat {entry.full_path}', 294 self._payload], text=True, stderr=subprocess.DEVNULL) 295 # Output of stat for a symlink should have the following line: 296 # Fast link dest: \"%.*s\" 297 m = re.search(r'\bFast link dest: \"(.+)\"\n', stdout) 298 if m: 299 return m.group(1) 300 301 # if above match fails, it means it's a slow link. Use cat. 302 output = subprocess.check_output([self._debugfs, '-R', f'cat {entry.full_path}', 303 self._payload], text=True, stderr=subprocess.DEVNULL) 304 305 if not output: 306 sys.exit('failed to read symlink target') 307 return output 308 309 def write_entry(self, entry, out_dir): 310 dest = os.path.normpath(os.path.join(out_dir, entry.full_path)) 311 if entry.is_directory: 312 if not os.path.exists(dest): 313 os.makedirs(dest, mode=0o755) 314 elif entry.is_symlink: 315 os.symlink(self.read_symlink(entry), dest) 316 else: 317 subprocess.check_output([self._debugfs, '-R', f'dump {entry.full_path} {dest}', 318 self._payload], text=True, stderr=subprocess.DEVNULL) 319 # retain mode bits 320 os.chmod(dest, entry.permissions) 321 322 323def RunList(args): 324 if GetType(args.apex) == ApexType.COMPRESSED: 325 with tempfile.TemporaryDirectory() as temp: 326 decompressed_apex = os.path.join(temp, 'temp.apex') 327 Decompress(args.apex, decompressed_apex) 328 args.apex = decompressed_apex 329 330 RunList(args) 331 return 332 333 with Apex(args) as apex: 334 for e in apex.list(): 335 # dot(., ..) directories 336 if not e.root and e.name in ('.', '..'): 337 continue 338 res = '' 339 if args.size: 340 res += e.size + ' ' 341 res += e.full_path 342 if args.extents: 343 res += ' [' + '-'.join(str(x) for x in e.extents) + ']' 344 if args.contexts: 345 res += ' ' + e.security_context 346 print(res) 347 348 349def RunExtract(args): 350 if GetType(args.apex) == ApexType.COMPRESSED: 351 with tempfile.TemporaryDirectory() as temp: 352 decompressed_apex = os.path.join(temp, 'temp.apex') 353 Decompress(args.apex, decompressed_apex) 354 args.apex = decompressed_apex 355 356 RunExtract(args) 357 return 358 359 with Apex(args) as apex: 360 if not os.path.exists(args.dest): 361 os.makedirs(args.dest, mode=0o755) 362 apex.extract(args.dest) 363 if os.path.isdir(os.path.join(args.dest, 'lost+found')): 364 shutil.rmtree(os.path.join(args.dest, 'lost+found')) 365 366class ApexType(enum.Enum): 367 INVALID = 0 368 UNCOMPRESSED = 1 369 COMPRESSED = 2 370 371 372def GetType(apex_path): 373 with zipfile.ZipFile(apex_path, 'r') as zip_file: 374 names = zip_file.namelist() 375 has_payload = 'apex_payload.img' in names 376 has_original_apex = 'original_apex' in names 377 if has_payload and has_original_apex: 378 return ApexType.INVALID 379 if has_payload: 380 return ApexType.UNCOMPRESSED 381 if has_original_apex: 382 return ApexType.COMPRESSED 383 return ApexType.INVALID 384 385 386def RunInfo(args): 387 if args.print_type: 388 res = GetType(args.apex) 389 if res == ApexType.INVALID: 390 print(args.apex + ' is not a valid apex') 391 sys.exit(1) 392 print(res.name) 393 elif args.print_payload_type: 394 print(Apex(args).payload_fs_type) 395 else: 396 manifest = apex_manifest.fromApex(args.apex) 397 print(apex_manifest.toJsonString(manifest)) 398 399 400def RunDecompress(args): 401 """RunDecompress takes path to compressed APEX and decompresses it to 402 produce the original uncompressed APEX at give output path 403 404 See apex_compression_tool.py#RunCompress for details on compressed APEX 405 structure. 406 407 Args: 408 args.input: file path to compressed APEX 409 args.output: file path to where decompressed APEX will be placed 410 """ 411 if GetType(args.input) == ApexType.UNCOMPRESSED and args.copy_if_uncompressed: 412 shutil.copyfile(args.input, args.output) 413 return 414 415 compressed_apex_fp = args.input 416 decompressed_apex_fp = args.output 417 return Decompress(compressed_apex_fp, decompressed_apex_fp) 418 419 420def Decompress(compressed_apex_fp, decompressed_apex_fp): 421 if os.path.exists(decompressed_apex_fp): 422 print("Output path '" + decompressed_apex_fp + "' already exists") 423 sys.exit(1) 424 425 with zipfile.ZipFile(compressed_apex_fp, 'r') as zip_obj: 426 if 'original_apex' not in zip_obj.namelist(): 427 print(compressed_apex_fp + ' is not a compressed APEX. Missing ' 428 "'original_apex' file inside it.") 429 sys.exit(1) 430 # Rename original_apex file to what user provided as output filename 431 original_apex_info = zip_obj.getinfo('original_apex') 432 original_apex_info.filename = os.path.basename(decompressed_apex_fp) 433 # Extract the original_apex as desired name 434 zip_obj.extract(original_apex_info, 435 path=os.path.dirname(decompressed_apex_fp)) 436 437 438def main(argv): 439 parser = argparse.ArgumentParser() 440 441 debugfs_default = None 442 fsckerofs_default = None 443 if 'ANDROID_HOST_OUT' in os.environ: 444 debugfs_default = os.path.join(os.environ['ANDROID_HOST_OUT'], 'bin/debugfs_static') 445 fsckerofs_default = os.path.join(os.environ['ANDROID_HOST_OUT'], 'bin/fsck.erofs') 446 parser.add_argument( 447 '--debugfs_path', help='The path to debugfs binary', default=debugfs_default) 448 parser.add_argument( 449 '--fsckerofs_path', help='The path to fsck.erofs binary', default=fsckerofs_default) 450 # TODO(b/279858383) remove the argument 451 parser.add_argument('--blkid_path', help='NOT USED') 452 453 subparsers = parser.add_subparsers(required=True, dest='cmd') 454 455 parser_list = subparsers.add_parser( 456 'list', help='prints content of an APEX to stdout') 457 parser_list.add_argument('apex', type=str, help='APEX file') 458 parser_list.add_argument( 459 '--size', help='also show the size of the files', action='store_true') 460 parser_list.add_argument( 461 '--extents', help='also show the location of the files', action='store_true') 462 parser_list.add_argument('-Z', '--contexts', 463 help='also show the security context of the files', 464 action='store_true') 465 parser_list.set_defaults(func=RunList) 466 467 parser_extract = subparsers.add_parser('extract', help='extracts content of an APEX to the given ' 468 'directory') 469 parser_extract.add_argument('apex', type=str, help='APEX file') 470 parser_extract.add_argument('dest', type=str, help='Directory to extract content of APEX to') 471 parser_extract.set_defaults(func=RunExtract) 472 473 parser_info = subparsers.add_parser('info', help='prints APEX manifest') 474 parser_info.add_argument('apex', type=str, help='APEX file') 475 parser_info.add_argument('--print-type', 476 help='Prints type of the apex (COMPRESSED or UNCOMPRESSED)', 477 action='store_true') 478 parser_info.add_argument('--print-payload-type', 479 help='Prints filesystem type of the apex payload', 480 action='store_true') 481 parser_info.set_defaults(func=RunInfo) 482 483 # Handle sub-command "decompress" 484 parser_decompress = subparsers.add_parser('decompress', 485 help='decompresses a compressed ' 486 'APEX') 487 parser_decompress.add_argument('--input', type=str, required=True, 488 help='path to compressed APEX file that ' 489 'will be decompressed') 490 parser_decompress.add_argument('--output', type=str, required=True, 491 help='path to the output APEX file') 492 parser_decompress.add_argument('--copy-if-uncompressed', 493 help='just copy the input if not compressed', 494 action='store_true') 495 parser_decompress.set_defaults(func=RunDecompress) 496 497 args = parser.parse_args(argv) 498 499 debugfs_required_for_cmd = ['list', 'extract'] 500 if args.cmd in debugfs_required_for_cmd and not args.debugfs_path: 501 print('ANDROID_HOST_OUT environment variable is not defined, --debugfs_path must be set', 502 file=sys.stderr) 503 sys.exit(1) 504 505 if args.cmd == 'extract': 506 if not args.fsckerofs_path: 507 print('ANDROID_HOST_OUT environment variable is not defined, --fsckerofs_path must be set', 508 file=sys.stderr) 509 sys.exit(1) 510 511 if not os.path.isfile(args.fsckerofs_path): 512 print(f'Cannot find fsck.erofs specified at {args.fsckerofs_path}', 513 file=sys.stderr) 514 sys.exit(1) 515 516 args.func(args) 517 518 519if __name__ == '__main__': 520 main(sys.argv[1:]) 521