• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parser for Android's version script information."""
17from __future__ import annotations
18
19from dataclasses import dataclass, field
20import logging
21import re
22from typing import (
23    Dict,
24    Iterable,
25    Iterator,
26    List,
27    Mapping,
28    NewType,
29    Optional,
30    TextIO,
31    Tuple,
32    Union,
33)
34
35
36ApiMap = Mapping[str, int]
37Arch = NewType('Arch', str)
38Tag = NewType('Tag', str)
39
40
41ALL_ARCHITECTURES = (
42    Arch('arm'),
43    Arch('arm64'),
44    Arch('riscv64'),
45    Arch('x86'),
46    Arch('x86_64'),
47)
48
49
50# Arbitrary magic number. We use the same one in api-level.h for this purpose.
51FUTURE_API_LEVEL = 10000
52
53
54def logger() -> logging.Logger:
55    """Return the main logger for this module."""
56    return logging.getLogger(__name__)
57
58
59@dataclass
60class Tags:
61    """Container class for the tags attached to a symbol or version."""
62
63    tags: tuple[Tag, ...] = field(default_factory=tuple)
64
65    @classmethod
66    def from_strs(cls, strs: Iterable[str]) -> Tags:
67        """Constructs tags from a collection of strings.
68
69        Does not decode API levels.
70        """
71        return Tags(tuple(Tag(s) for s in strs))
72
73    def __contains__(self, tag: Union[Tag, str]) -> bool:
74        return tag in self.tags
75
76    def __iter__(self) -> Iterator[Tag]:
77        yield from self.tags
78
79    @property
80    def has_mode_tags(self) -> bool:
81        """Returns True if any mode tags (apex, llndk, etc) are set."""
82        return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags
83
84    @property
85    def has_apex_tags(self) -> bool:
86        """Returns True if any APEX tags are set."""
87        return 'apex' in self.tags
88
89    @property
90    def has_systemapi_tags(self) -> bool:
91        """Returns True if any APEX tags are set."""
92        return 'systemapi' in self.tags
93
94    @property
95    def has_llndk_tags(self) -> bool:
96        """Returns True if any LL-NDK tags are set."""
97        return 'llndk' in self.tags
98
99    @property
100    def has_platform_only_tags(self) -> bool:
101        """Returns True if any platform-only tags are set."""
102        return 'platform-only' in self.tags
103
104
105@dataclass
106class Symbol:
107    """A symbol definition from a symbol file."""
108
109    name: str
110    tags: Tags
111
112
113@dataclass
114class Version:
115    """A version block of a symbol file."""
116
117    name: str
118    base: Optional[str]
119    tags: Tags
120    symbols: List[Symbol]
121
122    @property
123    def is_private(self) -> bool:
124        """Returns True if this version block is private (platform only)."""
125        return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM')
126
127
128def get_tags(line: str, api_map: ApiMap) -> Tags:
129    """Returns a list of all tags on this line."""
130    _, _, all_tags = line.strip().partition('#')
131    return Tags(tuple(
132        decode_api_level_tag(Tag(e), api_map)
133        for e in re.split(r'\s+', all_tags) if e.strip()
134    ))
135
136
137def is_api_level_tag(tag: Tag) -> bool:
138    """Returns true if this tag has an API level that may need decoding."""
139    if tag.startswith('introduced='):
140        return True
141    if tag.startswith('introduced-'):
142        return True
143    if tag.startswith('versioned='):
144        return True
145    return False
146
147
148def decode_api_level(api: str, api_map: ApiMap) -> int:
149    """Decodes the API level argument into the API level number.
150
151    For the average case, this just decodes the integer value from the string,
152    but for unreleased APIs we need to translate from the API codename (like
153    "O") to the future API level for that codename.
154    """
155    try:
156        return int(api)
157    except ValueError:
158        pass
159
160    if api == "current":
161        return FUTURE_API_LEVEL
162
163    return api_map[api]
164
165
166def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag:
167    """Decodes API level code name in a tag.
168
169    Raises:
170        ParseError: An unknown version name was found in a tag.
171    """
172    if not is_api_level_tag(tag):
173        return tag
174
175    name, value = split_tag(tag)
176    try:
177        decoded = str(decode_api_level(value, api_map))
178        return Tag(f'{name}={decoded}')
179    except KeyError as ex:
180        raise ParseError(f'Unknown version name in tag: {tag}') from ex
181
182
183def split_tag(tag: Tag) -> Tuple[str, str]:
184    """Returns a key/value tuple of the tag.
185
186    Raises:
187        ValueError: Tag is not a key/value type tag.
188
189    Returns: Tuple of (key, value) of the tag. Both components are strings.
190    """
191    if '=' not in tag:
192        raise ValueError('Not a key/value tag: ' + tag)
193    key, _, value = tag.partition('=')
194    return key, value
195
196
197def get_tag_value(tag: Tag) -> str:
198    """Returns the value of a key/value tag.
199
200    Raises:
201        ValueError: Tag is not a key/value type tag.
202
203    Returns: Value part of tag as a string.
204    """
205    return split_tag(tag)[1]
206
207class Filter:
208    """A filter encapsulates a condition that tells whether a version or a
209    symbol should be omitted or not
210    """
211
212    def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi:
213                 bool = False, ndk: bool = True):
214        self.arch = arch
215        self.api = api
216        self.llndk = llndk
217        self.apex = apex
218        self.systemapi = systemapi
219        self.ndk = ndk
220
221    def _should_omit_tags(self, tags: Tags) -> bool:
222        """Returns True if the tagged object should be omitted.
223
224        This defines the rules shared between version tagging and symbol tagging.
225        """
226        # The apex and llndk tags will only exclude APIs from other modes. If in
227        # APEX or LLNDK mode and neither tag is provided, we fall back to the
228        # default behavior because all NDK symbols are implicitly available to
229        # APEX and LLNDK.
230        if tags.has_mode_tags:
231            if self.apex and tags.has_apex_tags:
232                return False
233            if self.llndk and tags.has_llndk_tags:
234                return False
235            if self.systemapi and tags.has_systemapi_tags:
236                return False
237            return True
238        if not symbol_in_arch(tags, self.arch):
239            return True
240        if not symbol_in_api(tags, self.arch, self.api):
241            return True
242        return False
243
244    def should_omit_version(self, version: Version) -> bool:
245        """Returns True if the version section should be omitted.
246
247        We want to omit any sections that do not have any symbols we'll have in
248        the stub library. Sections that contain entirely future symbols or only
249        symbols for certain architectures.
250        """
251        if version.is_private:
252            return True
253        if version.tags.has_platform_only_tags:
254            return True
255        return self._should_omit_tags(version.tags)
256
257    def should_omit_symbol(self, symbol: Symbol) -> bool:
258        """Returns True if the symbol should be omitted."""
259        if not symbol.tags.has_mode_tags and not self.ndk:
260            # Symbols that don't have mode tags are NDK. They are usually
261            # included, but have to be omitted if NDK symbols are explicitly
262            # filtered-out
263            return True
264
265        return self._should_omit_tags(symbol.tags)
266
267def symbol_in_arch(tags: Tags, arch: Arch) -> bool:
268    """Returns true if the symbol is present for the given architecture."""
269    has_arch_tags = False
270    for tag in tags:
271        if tag == arch:
272            return True
273        if tag in ALL_ARCHITECTURES:
274            has_arch_tags = True
275
276    # If there were no arch tags, the symbol is available for all
277    # architectures. If there were any arch tags, the symbol is only available
278    # for the tagged architectures.
279    return not has_arch_tags
280
281
282def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
283    """Returns true if the symbol is present for the given API level."""
284    introduced_tag = None
285    arch_specific = False
286    for tag in tags:
287        # If there is an arch-specific tag, it should override the common one.
288        if tag.startswith('introduced=') and not arch_specific:
289            introduced_tag = tag
290        elif tag.startswith('introduced-' + arch + '='):
291            introduced_tag = tag
292            arch_specific = True
293        elif tag == 'future':
294            return api == FUTURE_API_LEVEL
295
296    if introduced_tag is None:
297        # We found no "introduced" tags, so the symbol has always been
298        # available.
299        return True
300
301    return api >= int(get_tag_value(introduced_tag))
302
303
304def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
305    """Returns true if the symbol should be versioned for the given API.
306
307    This models the `versioned=API` tag. This should be a very uncommonly
308    needed tag, and is really only needed to fix versioning mistakes that are
309    already out in the wild.
310
311    For example, some of libc's __aeabi_* functions were originally placed in
312    the private version, but that was incorrect. They are now in LIBC_N, but
313    when building against any version prior to N we need the symbol to be
314    unversioned (otherwise it won't resolve on M where it is private).
315    """
316    for tag in tags:
317        if tag.startswith('versioned='):
318            return api >= int(get_tag_value(tag))
319    # If there is no "versioned" tag, the tag has been versioned for as long as
320    # it was introduced.
321    return True
322
323
324class ParseError(RuntimeError):
325    """An error that occurred while parsing a symbol file."""
326
327
328class MultiplyDefinedSymbolError(RuntimeError):
329    """A symbol name was multiply defined."""
330    def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
331        super().__init__(
332            'Version script contains multiple definitions for: {}'.format(
333                ', '.join(multiply_defined_symbols)))
334        self.multiply_defined_symbols = multiply_defined_symbols
335
336
337class SymbolFileParser:
338    """Parses NDK symbol files."""
339    def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None:
340        self.input_file = input_file
341        self.api_map = api_map
342        self.filter = filt
343        self.current_line: Optional[str] = None
344
345    def parse(self) -> List[Version]:
346        """Parses the symbol file and returns a list of Version objects."""
347        versions = []
348        while self.next_line():
349            assert self.current_line is not None
350            if '{' in self.current_line:
351                versions.append(self.parse_version())
352            else:
353                raise ParseError(
354                    f'Unexpected contents at top level: {self.current_line}')
355
356        self.check_no_duplicate_symbols(versions)
357        return versions
358
359    def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
360        """Raises errors for multiply defined symbols.
361
362        This situation is the normal case when symbol versioning is actually
363        used, but this script doesn't currently handle that. The error message
364        will be a not necessarily obvious "error: redefition of 'foo'" from
365        stub.c, so it's better for us to catch this situation and raise a
366        better error.
367        """
368        symbol_names = set()
369        multiply_defined_symbols = set()
370        for version in versions:
371            if self.filter.should_omit_version(version):
372                continue
373
374            for symbol in version.symbols:
375                if self.filter.should_omit_symbol(symbol):
376                    continue
377
378                if symbol.name in symbol_names:
379                    multiply_defined_symbols.add(symbol.name)
380                symbol_names.add(symbol.name)
381        if multiply_defined_symbols:
382            raise MultiplyDefinedSymbolError(
383                sorted(list(multiply_defined_symbols)))
384
385    def parse_version(self) -> Version:
386        """Parses a single version section and returns a Version object."""
387        assert self.current_line is not None
388        name = self.current_line.split('{')[0].strip()
389        tags = get_tags(self.current_line, self.api_map)
390        symbols: List[Symbol] = []
391        global_scope = True
392        cpp_symbols = False
393        while self.next_line():
394            if '}' in self.current_line:
395                # Line is something like '} BASE; # tags'. Both base and tags
396                # are optional here.
397                base = self.current_line.partition('}')[2]
398                base = base.partition('#')[0].strip()
399                if not base.endswith(';'):
400                    raise ParseError(
401                        'Unterminated version/export "C++" block (expected ;).')
402                if cpp_symbols:
403                    cpp_symbols = False
404                else:
405                    base = base.rstrip(';').rstrip()
406                    return Version(name, base or None, tags, symbols)
407            elif 'extern "C++" {' in self.current_line:
408                cpp_symbols = True
409            elif not cpp_symbols and ':' in self.current_line:
410                visibility = self.current_line.split(':')[0].strip()
411                if visibility == 'local':
412                    global_scope = False
413                elif visibility == 'global':
414                    global_scope = True
415                else:
416                    raise ParseError('Unknown visiblity label: ' + visibility)
417            elif global_scope and not cpp_symbols:
418                symbols.append(self.parse_symbol())
419            else:
420                # We're in a hidden scope or in 'extern "C++"' block. Ignore
421                # everything.
422                pass
423        raise ParseError('Unexpected EOF in version block.')
424
425    def parse_symbol(self) -> Symbol:
426        """Parses a single symbol line and returns a Symbol object."""
427        assert self.current_line is not None
428        if ';' not in self.current_line:
429            raise ParseError(
430                'Expected ; to terminate symbol: ' + self.current_line)
431        if '*' in self.current_line:
432            raise ParseError(
433                'Wildcard global symbols are not permitted.')
434        # Line is now in the format "<symbol-name>; # tags"
435        name, _, _ = self.current_line.strip().partition(';')
436        tags = get_tags(self.current_line, self.api_map)
437        return Symbol(name, tags)
438
439    def next_line(self) -> str:
440        """Returns the next non-empty non-comment line.
441
442        A return value of '' indicates EOF.
443        """
444        line = self.input_file.readline()
445        while not line.strip() or line.strip().startswith('#'):
446            line = self.input_file.readline()
447
448            # We want to skip empty lines, but '' indicates EOF.
449            if not line:
450                break
451        self.current_line = line
452        return self.current_line
453