• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""deapexer is a tool that prints out content of an APEX.
17
18To print content of an APEX to stdout:
19  deapexer list foo.apex
20
21To extract content of an APEX to the given directory:
22  deapexer extract foo.apex dest
23"""
24from __future__ import print_function
25
26import argparse
27import apex_manifest
28import enum
29import os
30import re
31import shutil
32import sys
33import subprocess
34import tempfile
35import zipfile
36
37BLOCK_SIZE = 4096
38
39# See apexd/apex_file.cpp#RetrieveFsType
40FS_TYPES = [
41    ('f2fs', 1024, b'\x10\x20\xf5\xf2'),
42    ('ext4', 1024 + 0x38, b'\123\357'),
43    ('erofs', 1024, b'\xe2\xe1\xf5\xe0'),
44]
45
46
47def RetrieveFileSystemType(file):
48  """Returns filesystem type with magic"""
49  with open(file, 'rb') as f:
50    for fs_type, offset, magic in FS_TYPES:
51      buf = bytearray(len(magic))
52      f.seek(offset, os.SEEK_SET)
53      f.readinto(buf)
54      if buf == magic:
55        return fs_type
56  raise ValueError('Failed to retrieve filesystem type')
57
58class ApexImageEntry(object):
59  """Represents an entry in APEX payload"""
60  def __init__(self, name, *, base_dir, permissions, size, ino, extents,
61               is_directory, is_symlink, security_context):
62    self._name = name
63    self._base_dir = base_dir
64    self._permissions = permissions
65    self._size = size
66    self._is_directory = is_directory
67    self._is_symlink = is_symlink
68    self._ino = ino
69    self._extents = extents
70    self._security_context = security_context
71    self._entries = []
72
73  @property
74  def name(self):
75    return self._name
76
77  @property
78  def root(self):
79    return self._base_dir == './' and self._name == '.'
80
81  @property
82  def full_path(self):
83    if self.root:
84      return self._base_dir  # './'
85    path = os.path.join(self._base_dir, self._name)
86    if self.is_directory:
87      path += '/'
88    return path
89
90  @property
91  def is_directory(self):
92    return self._is_directory
93
94  @property
95  def is_symlink(self):
96    return self._is_symlink
97
98  @property
99  def is_regular_file(self):
100    return not self.is_directory and not self.is_symlink
101
102  @property
103  def permissions(self):
104    return self._permissions
105
106  @property
107  def size(self):
108    return self._size
109
110  @property
111  def ino(self):
112    return self._ino
113
114  @property
115  def entries(self):
116    return self._entries
117
118  @property
119  def extents(self):
120    return self._extents
121
122  @property
123  def security_context(self):
124    return self._security_context
125
126  def __str__(self):
127    ret = ''
128    if self._is_directory:
129      ret += 'd'
130    elif self._is_symlink:
131      ret += 'l'
132    else:
133      ret += '-'
134
135    def MaskAsString(m):
136      ret = 'r' if m & 4 == 4 else '-'
137      ret += 'w' if m & 2 == 2 else '-'
138      ret += 'x' if m & 1 == 1 else '-'
139      return ret
140
141    ret += MaskAsString(self._permissions >> 6)
142    ret += MaskAsString((self._permissions >> 3) & 7)
143    ret += MaskAsString(self._permissions & 7)
144
145    return ret + ' ' + self._size + ' ' + self._name
146
147
148class Apex(object):
149  """Represents an APEX file"""
150  def __init__(self, args):
151    self._debugfs = args.debugfs_path
152    self._fsckerofs = args.fsckerofs_path
153    self._apex = args.apex
154    self._tempdir = tempfile.mkdtemp()
155    with zipfile.ZipFile(self._apex, 'r') as zip_ref:
156      self._payload = zip_ref.extract('apex_payload.img', path=self._tempdir)
157    self._payload_fs_type = RetrieveFileSystemType(self._payload)
158
159  def __del__(self):
160    shutil.rmtree(self._tempdir)
161
162  def __enter__(self):
163    return self
164
165  def __exit__(self, ex_type, value, traceback):
166    pass
167
168  def list(self):
169    if self._payload_fs_type not in ['ext4']:
170      sys.exit(f'{self._payload_fs_type} is not supported for `list`.')
171
172    yield from self.entries()
173
174  def read_dir(self, path) -> ApexImageEntry:
175    assert path.endswith('/')
176    assert self.payload_fs_type == 'ext4'
177
178    res = subprocess.check_output([self._debugfs, '-R', f'ls -l -p {path}', self._payload],
179                                  text=True, stderr=subprocess.DEVNULL)
180    dir_entry = None
181    entries = []
182    for line in res.split('\n'):
183      if not line:
184        continue
185      parts = line.split('/')
186      if len(parts) != 8:
187        continue
188      name = parts[5]
189      if not name:
190        continue
191      if name == '..':
192        continue
193      if name == 'lost+found' and path == './':
194        continue
195      ino = parts[1]
196      bits = parts[2]
197      size = parts[6]
198      extents = []
199      is_symlink = bits[1]=='2'
200      is_directory=bits[1]=='4'
201
202      if not is_symlink and not is_directory:
203        stdout = subprocess.check_output([self._debugfs, '-R', f'dump_extents <{ino}>',
204                                          self._payload], text=True, stderr=subprocess.DEVNULL)
205        # Output of dump_extents for an inode fragmented in 3 blocks (length and addresses represent
206        # block-sized sections):
207        # Level Entries       Logical      Physical Length Flags
208        # 0/ 0   1/  3     0 -     0    18 -    18      1
209        # 0/ 0   2/  3     1 -    15    20 -    34     15
210        # 0/ 0   3/  3    16 -  1863    37 -  1884   1848
211        res = stdout.splitlines()
212        res.pop(0) # the first line contains only columns names
213        left_length = int(size)
214        try: # dump_extents sometimes has an unexpected output
215          for line in res:
216            tokens = line.split()
217            offset = int(tokens[7]) * BLOCK_SIZE
218            length = min(int(tokens[-1]) * BLOCK_SIZE, left_length)
219            left_length -= length
220            extents.append((offset, length))
221          if left_length != 0: # dump_extents sometimes fails to display "hole" blocks
222            raise ValueError
223        except: # pylint: disable=bare-except
224          extents = [] # [] means that we failed to retrieve the file location successfully
225
226      # get 'security.selinux' attribute
227      entry_path = os.path.join(path, name)
228      stdout = subprocess.check_output([
229        self._debugfs,
230        '-R',
231        f'ea_get -V {entry_path} security.selinux',
232        self._payload
233      ], text=True, stderr=subprocess.DEVNULL)
234      security_context = stdout.rstrip('\n\x00')
235
236      entry = ApexImageEntry(name,
237                             base_dir=path,
238                             permissions=int(bits[3:], 8),
239                             size=size,
240                             is_directory=is_directory,
241                             is_symlink=is_symlink,
242                             ino=ino,
243                             extents=extents,
244                             security_context=security_context)
245      if name == '.':
246        dir_entry = entry
247      elif is_directory:
248        sub_dir_entry = self.read_dir(path + name + '/')
249        # sub_dir_entry should be the same inode
250        assert entry.ino == sub_dir_entry.ino
251        entry.entries.extend(sub_dir_entry.entries)
252        entries.append(entry)
253      else:
254        entries.append(entry)
255
256    assert dir_entry
257    dir_entry.entries.extend(sorted(entries, key=lambda e: e.name))
258    return dir_entry
259
260  def extract(self, dest):
261    """Recursively dumps contents of the payload with retaining mode bits, but not owner/group"""
262    if self._payload_fs_type == 'erofs':
263      subprocess.run([self._fsckerofs, f'--extract={dest}', '--overwrite',
264                     '--no-preserve-owner', self._payload], stdout=subprocess.DEVNULL, check=True)
265    elif self._payload_fs_type == 'ext4':
266      # Extract entries one by one using `dump` because `rdump` doesn't support
267      # "no-perserve" mode
268      for entry in self.entries():
269        self.write_entry(entry, dest)
270    else:
271      # TODO(b/279688635) f2fs is not supported yet.
272      sys.exit(f'{self._payload_fs_type} is not supported for `extract`.')
273
274  @property
275  def payload_fs_type(self) -> str:
276    return self._payload_fs_type
277
278  def entries(self):
279    """Generator to visit all entries in the payload starting from root(./)"""
280
281    def TopDown(entry):
282      yield entry
283      for child in entry.entries:
284        yield from TopDown(child)
285
286    root = self.read_dir('./')
287    yield from TopDown(root)
288
289  def read_symlink(self, entry):
290    assert entry.is_symlink
291    assert self.payload_fs_type == 'ext4'
292
293    stdout = subprocess.check_output([self._debugfs, '-R', f'stat {entry.full_path}',
294                                      self._payload], text=True, stderr=subprocess.DEVNULL)
295    # Output of stat for a symlink should have the following line:
296    #   Fast link dest: \"%.*s\"
297    m = re.search(r'\bFast link dest: \"(.+)\"\n', stdout)
298    if m:
299      return m.group(1)
300
301    # if above match fails, it means it's a slow link. Use cat.
302    output = subprocess.check_output([self._debugfs, '-R', f'cat {entry.full_path}',
303                                      self._payload], text=True, stderr=subprocess.DEVNULL)
304
305    if not output:
306      sys.exit('failed to read symlink target')
307    return output
308
309  def write_entry(self, entry, out_dir):
310    dest = os.path.normpath(os.path.join(out_dir, entry.full_path))
311    if entry.is_directory:
312      if not os.path.exists(dest):
313        os.makedirs(dest, mode=0o755)
314    elif entry.is_symlink:
315      os.symlink(self.read_symlink(entry), dest)
316    else:
317      subprocess.check_output([self._debugfs, '-R', f'dump {entry.full_path} {dest}',
318        self._payload], text=True, stderr=subprocess.DEVNULL)
319      # retain mode bits
320      os.chmod(dest, entry.permissions)
321
322
323def RunList(args):
324  if GetType(args.apex) == ApexType.COMPRESSED:
325    with tempfile.TemporaryDirectory() as temp:
326      decompressed_apex = os.path.join(temp, 'temp.apex')
327      Decompress(args.apex, decompressed_apex)
328      args.apex = decompressed_apex
329
330      RunList(args)
331      return
332
333  with Apex(args) as apex:
334    for e in apex.list():
335      # dot(., ..) directories
336      if not e.root and e.name in ('.', '..'):
337        continue
338      res = ''
339      if args.size:
340        res += e.size + ' '
341      res += e.full_path
342      if args.extents:
343        res += ' [' + '-'.join(str(x) for x in e.extents) + ']'
344      if args.contexts:
345        res += ' ' + e.security_context
346      print(res)
347
348
349def RunExtract(args):
350  if GetType(args.apex) == ApexType.COMPRESSED:
351    with tempfile.TemporaryDirectory() as temp:
352      decompressed_apex = os.path.join(temp, 'temp.apex')
353      Decompress(args.apex, decompressed_apex)
354      args.apex = decompressed_apex
355
356      RunExtract(args)
357      return
358
359  with Apex(args) as apex:
360    if not os.path.exists(args.dest):
361      os.makedirs(args.dest, mode=0o755)
362    apex.extract(args.dest)
363    if os.path.isdir(os.path.join(args.dest, 'lost+found')):
364      shutil.rmtree(os.path.join(args.dest, 'lost+found'))
365
366class ApexType(enum.Enum):
367  INVALID = 0
368  UNCOMPRESSED = 1
369  COMPRESSED = 2
370
371
372def GetType(apex_path):
373  with zipfile.ZipFile(apex_path, 'r') as zip_file:
374    names = zip_file.namelist()
375    has_payload = 'apex_payload.img' in names
376    has_original_apex = 'original_apex' in names
377    if has_payload and has_original_apex:
378      return ApexType.INVALID
379    if has_payload:
380      return ApexType.UNCOMPRESSED
381    if has_original_apex:
382      return ApexType.COMPRESSED
383    return ApexType.INVALID
384
385
386def RunInfo(args):
387  if args.print_type:
388    res = GetType(args.apex)
389    if res == ApexType.INVALID:
390      print(args.apex + ' is not a valid apex')
391      sys.exit(1)
392    print(res.name)
393  elif args.print_payload_type:
394    print(Apex(args).payload_fs_type)
395  else:
396    manifest = apex_manifest.fromApex(args.apex)
397    print(apex_manifest.toJsonString(manifest))
398
399
400def RunDecompress(args):
401  """RunDecompress takes path to compressed APEX and decompresses it to
402  produce the original uncompressed APEX at give output path
403
404  See apex_compression_tool.py#RunCompress for details on compressed APEX
405  structure.
406
407  Args:
408      args.input: file path to compressed APEX
409      args.output: file path to where decompressed APEX will be placed
410  """
411  if GetType(args.input) == ApexType.UNCOMPRESSED and args.copy_if_uncompressed:
412    shutil.copyfile(args.input, args.output)
413    return
414
415  compressed_apex_fp = args.input
416  decompressed_apex_fp = args.output
417  return Decompress(compressed_apex_fp, decompressed_apex_fp)
418
419
420def Decompress(compressed_apex_fp, decompressed_apex_fp):
421  if os.path.exists(decompressed_apex_fp):
422    print("Output path '" + decompressed_apex_fp + "' already exists")
423    sys.exit(1)
424
425  with zipfile.ZipFile(compressed_apex_fp, 'r') as zip_obj:
426    if 'original_apex' not in zip_obj.namelist():
427      print(compressed_apex_fp + ' is not a compressed APEX. Missing '
428                                 "'original_apex' file inside it.")
429      sys.exit(1)
430    # Rename original_apex file to what user provided as output filename
431    original_apex_info = zip_obj.getinfo('original_apex')
432    original_apex_info.filename = os.path.basename(decompressed_apex_fp)
433    # Extract the original_apex as desired name
434    zip_obj.extract(original_apex_info,
435                    path=os.path.dirname(decompressed_apex_fp))
436
437
438def main(argv):
439  parser = argparse.ArgumentParser()
440
441  debugfs_default = None
442  fsckerofs_default = None
443  if 'ANDROID_HOST_OUT' in os.environ:
444    debugfs_default = os.path.join(os.environ['ANDROID_HOST_OUT'], 'bin/debugfs_static')
445    fsckerofs_default = os.path.join(os.environ['ANDROID_HOST_OUT'], 'bin/fsck.erofs')
446  parser.add_argument(
447      '--debugfs_path', help='The path to debugfs binary', default=debugfs_default)
448  parser.add_argument(
449      '--fsckerofs_path', help='The path to fsck.erofs binary', default=fsckerofs_default)
450  # TODO(b/279858383) remove the argument
451  parser.add_argument('--blkid_path', help='NOT USED')
452
453  subparsers = parser.add_subparsers(required=True, dest='cmd')
454
455  parser_list = subparsers.add_parser(
456      'list', help='prints content of an APEX to stdout')
457  parser_list.add_argument('apex', type=str, help='APEX file')
458  parser_list.add_argument(
459      '--size', help='also show the size of the files', action='store_true')
460  parser_list.add_argument(
461      '--extents', help='also show the location of the files', action='store_true')
462  parser_list.add_argument('-Z', '--contexts',
463                           help='also show the security context of the files',
464                           action='store_true')
465  parser_list.set_defaults(func=RunList)
466
467  parser_extract = subparsers.add_parser('extract', help='extracts content of an APEX to the given '
468                                                         'directory')
469  parser_extract.add_argument('apex', type=str, help='APEX file')
470  parser_extract.add_argument('dest', type=str, help='Directory to extract content of APEX to')
471  parser_extract.set_defaults(func=RunExtract)
472
473  parser_info = subparsers.add_parser('info', help='prints APEX manifest')
474  parser_info.add_argument('apex', type=str, help='APEX file')
475  parser_info.add_argument('--print-type',
476                           help='Prints type of the apex (COMPRESSED or UNCOMPRESSED)',
477                           action='store_true')
478  parser_info.add_argument('--print-payload-type',
479                           help='Prints filesystem type of the apex payload',
480                           action='store_true')
481  parser_info.set_defaults(func=RunInfo)
482
483  # Handle sub-command "decompress"
484  parser_decompress = subparsers.add_parser('decompress',
485                                            help='decompresses a compressed '
486                                                 'APEX')
487  parser_decompress.add_argument('--input', type=str, required=True,
488                                 help='path to compressed APEX file that '
489                                      'will be decompressed')
490  parser_decompress.add_argument('--output', type=str, required=True,
491                                 help='path to the output APEX file')
492  parser_decompress.add_argument('--copy-if-uncompressed',
493                                 help='just copy the input if not compressed',
494                                 action='store_true')
495  parser_decompress.set_defaults(func=RunDecompress)
496
497  args = parser.parse_args(argv)
498
499  debugfs_required_for_cmd = ['list', 'extract']
500  if args.cmd in debugfs_required_for_cmd and not args.debugfs_path:
501    print('ANDROID_HOST_OUT environment variable is not defined, --debugfs_path must be set',
502          file=sys.stderr)
503    sys.exit(1)
504
505  if args.cmd == 'extract':
506    if not args.fsckerofs_path:
507      print('ANDROID_HOST_OUT environment variable is not defined, --fsckerofs_path must be set',
508            file=sys.stderr)
509      sys.exit(1)
510
511    if not os.path.isfile(args.fsckerofs_path):
512      print(f'Cannot find fsck.erofs specified at {args.fsckerofs_path}',
513            file=sys.stderr)
514      sys.exit(1)
515
516  args.func(args)
517
518
519if __name__ == '__main__':
520  main(sys.argv[1:])
521