• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import itertools
25import os.path
26import re
27
28try:
29    import bpf
30except ImportError:
31    from minijail import bpf
32
33
34# Representations of numbers with different radix (base) in C.
35HEX_REGEX = r'-?0[xX][0-9a-fA-F]+'
36OCTAL_REGEX = r'-?0[0-7]+'
37DECIMAL_REGEX = r'-?[0-9]+'
38
39
40Token = collections.namedtuple(
41    'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
42
43# A regex that can tokenize a Minijail policy file line.
44_TOKEN_SPECIFICATION = (
45    ('COMMENT', r'#.*$'),
46    ('WHITESPACE', r'\s+'),
47    ('CONTINUATION', r'\\$'),
48    ('DEFAULT', r'@default\b'),
49    ('INCLUDE', r'@include\b'),
50    ('FREQUENCY', r'@frequency\b'),
51    ('DENYLIST', r'@denylist$'),
52    ('PATH', r'(?:\.)?/\S+'),
53    ('NUMERIC_CONSTANT', f'{HEX_REGEX}|{OCTAL_REGEX}|{DECIMAL_REGEX}'),
54    ('COLON', r':'),
55    ('SEMICOLON', r';'),
56    ('COMMA', r','),
57    ('BITWISE_COMPLEMENT', r'~'),
58    ('LPAREN', r'\('),
59    ('RPAREN', r'\)'),
60    ('LBRACE', r'\{'),
61    ('RBRACE', r'\}'),
62    ('RBRACKET', r'\]'),
63    ('LBRACKET', r'\['),
64    ('OR', r'\|\|'),
65    ('AND', r'&&'),
66    ('BITWISE_OR', r'\|'),
67    ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
68    ('EQUAL', r'='),
69    ('ARGUMENT', r'\barg[0-9]+\b'),
70    ('RETURN', r'\breturn\b'),
71    ('ACTION',
72     r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
73     r'\btrace\b|\blog\b|\buser-notify\b'
74    ),
75    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
76)
77_TOKEN_RE = re.compile('|'.join(
78    r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
79
80
81class ParseException(Exception):
82    """An exception that is raised when parsing fails."""
83
84    # pylint: disable=too-many-arguments
85    def __init__(self,
86                 message,
87                 filename,
88                 *,
89                 line='',
90                 line_number=1,
91                 token=None):
92        if token:
93            line = token.line
94            line_number = token.line_number
95            column = token.column
96            length = len(token.value)
97        else:
98            column = len(line)
99            length = 1
100
101        message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
102                                       message)
103        message += '\n    %s' % line
104        message += '\n    %s%s' % (' ' * column, '^' * length)
105        super().__init__(message)
106
107
108class ParserState:
109    """Stores the state of the Parser to provide better diagnostics."""
110
111    def __init__(self, filename):
112        self._filename = filename
113        self._line = ''
114        self._line_number = 0
115
116    @property
117    def filename(self):
118        """Return the name of the file being processed."""
119        return self._filename
120
121    @property
122    def line(self):
123        """Return the current line being processed."""
124        return self._line
125
126    @property
127    def line_number(self):
128        """Return the current line number being processed."""
129        return self._line_number
130
131    def error(self, message, token=None):
132        """Raise a ParserException with the provided message."""
133        raise ParseException(
134            message,
135            self.filename,
136            line=self._line,
137            line_number=self._line_number,
138            token=token)
139
140    def tokenize(self, lines):
141        """Return a list of tokens for the current line."""
142        tokens = []
143
144        for line_number, line in enumerate(lines):
145            self._line_number = line_number + 1
146            self._line = line.rstrip('\r\n')
147
148            last_end = 0
149            for token in _TOKEN_RE.finditer(self._line):
150                if token.start() != last_end:
151                    self.error(
152                        'invalid token',
153                        token=Token('INVALID',
154                                    self._line[last_end:token.start()],
155                                    self.filename, self._line,
156                                    self._line_number, last_end))
157                last_end = token.end()
158
159                # Omit whitespace and comments now to avoid sprinkling this logic
160                # elsewhere.
161                if token.lastgroup in ('WHITESPACE', 'COMMENT',
162                                       'CONTINUATION'):
163                    continue
164                tokens.append(
165                    Token(token.lastgroup, token.group(), self.filename,
166                          self._line, self._line_number, token.start()))
167            if last_end != len(self._line):
168                self.error(
169                    'invalid token',
170                    token=Token('INVALID', self._line[last_end:],
171                                self.filename, self._line, self._line_number,
172                                last_end))
173
174            if self._line.endswith('\\'):
175                # This line is not finished yet.
176                continue
177
178            if tokens:
179                # Return a copy of the token list so that the caller can be free
180                # to modify it.
181                yield tokens[::]
182            tokens.clear()
183
184
185Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
186"""A single boolean comparison within a filter expression."""
187
188Filter = collections.namedtuple('Filter', ['expression', 'action'])
189"""The result of parsing a DNF filter expression, with its action.
190
191Since the expression is in Disjunctive Normal Form, it is composed of two levels
192of lists, one for disjunctions and the inner one for conjunctions. The elements
193of the inner list are Atoms.
194"""
195
196Syscall = collections.namedtuple('Syscall', ['name', 'number'])
197"""A system call."""
198
199ParsedFilterStatement = collections.namedtuple(
200    'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
201"""The result of parsing a filter statement.
202
203Statements have a list of syscalls, and an associated list of filters that will
204be evaluated sequentially when any of the syscalls is invoked.
205"""
206
207FilterStatement = collections.namedtuple('FilterStatement',
208                                         ['syscall', 'frequency', 'filters'])
209"""The filter list for a particular syscall.
210
211This is a mapping from one syscall to a list of filters that are evaluated
212sequentially. The last filter is always an unconditional action.
213"""
214
215ParsedPolicy = collections.namedtuple('ParsedPolicy',
216                                      ['default_action', 'filter_statements'])
217"""The result of parsing a minijail .policy file."""
218
219
220# pylint: disable=too-few-public-methods
221class PolicyParser:
222    """A parser for the Minijail seccomp policy file format."""
223
224    def __init__(self,
225                 arch,
226                 *,
227                 kill_action,
228                 include_depth_limit=10,
229                 override_default_action=None,
230                 denylist=False,
231                 ret_log=False):
232        self._parser_states = [ParserState("<memory>")]
233        self._kill_action = kill_action
234        self._include_depth_limit = include_depth_limit
235        if denylist:
236            self._default_action = bpf.Allow()
237        else:
238            self._default_action = self._kill_action
239        self._override_default_action = override_default_action
240        self._frequency_mapping = collections.defaultdict(int)
241        self._arch = arch
242        self._denylist = denylist
243        self._ret_log = ret_log
244
245    @property
246    def _parser_state(self):
247        return self._parser_states[-1]
248
249    # single-constant = identifier
250    #                 | numeric-constant
251    #                 ;
252    def _parse_single_constant(self, token):
253        if token.type == 'IDENTIFIER':
254            if token.value not in self._arch.constants:
255                self._parser_state.error('invalid constant', token=token)
256            single_constant = self._arch.constants[token.value]
257        elif token.type == 'NUMERIC_CONSTANT':
258            # As `int(_, 0)` in Python != `strtol(_, _, 0)` in C, to make sure
259            # the number parsing behaves exactly in C, instead of using `int()`
260            # directly, we list out all the possible formats for octal, decimal
261            # and hex numbers, and determine the corresponding base by regex.
262            try:
263                if re.match(HEX_REGEX, token.value):
264                    base = 16
265                elif re.match(OCTAL_REGEX, token.value):
266                    base = 8
267                elif re.match(DECIMAL_REGEX, token.value):
268                    base = 10
269                else:
270                    # This should never happen.
271                    raise ValueError
272                single_constant = int(token.value, base=base)
273            except ValueError:
274                self._parser_state.error('invalid constant', token=token)
275        else:
276            self._parser_state.error('invalid constant', token=token)
277        if single_constant > self._arch.max_unsigned:
278            self._parser_state.error('unsigned overflow', token=token)
279        elif single_constant < self._arch.min_signed:
280            self._parser_state.error('signed underflow', token=token)
281        elif single_constant < 0:
282            # This converts the constant to an unsigned representation of the
283            # same value, since BPF only uses unsigned values.
284            single_constant = self._arch.truncate_word(single_constant)
285        return single_constant
286
287    # constant = [ '~' ] , '(' , value , ')'
288    #          | [ '~' ] , single-constant
289    #          ;
290    def _parse_constant(self, tokens):
291        negate = False
292        if tokens[0].type == 'BITWISE_COMPLEMENT':
293            negate = True
294            tokens.pop(0)
295            if not tokens:
296                self._parser_state.error('empty complement')
297            if tokens[0].type == 'BITWISE_COMPLEMENT':
298                self._parser_state.error(
299                    'invalid double complement', token=tokens[0])
300        if tokens[0].type == 'LPAREN':
301            last_open_paren = tokens.pop(0)
302            single_value = self.parse_value(tokens)
303            if not tokens or tokens[0].type != 'RPAREN':
304                self._parser_state.error(
305                    'unclosed parenthesis', token=last_open_paren)
306        else:
307            single_value = self._parse_single_constant(tokens[0])
308        tokens.pop(0)
309        if negate:
310            single_value = self._arch.truncate_word(~single_value)
311        return single_value
312
313    # value = constant , [ { '|' , constant } ]
314    #       ;
315    def parse_value(self, tokens):
316        """Parse constants separated bitwise OR operator |.
317
318        Constants can be:
319
320        - A number that can be parsed with strtol() in C.
321        - A named constant expression.
322        - A parenthesized, valid constant expression.
323        - A valid constant expression prefixed with the unary bitwise
324          complement operator ~.
325        - A series of valid constant expressions separated by bitwise
326          OR operator |.
327
328        If there is an error parsing any of the constants, the whole process
329        fails.
330        """
331
332        value = 0
333        while tokens:
334            value |= self._parse_constant(tokens)
335            if not tokens or tokens[0].type != 'BITWISE_OR':
336                break
337            tokens.pop(0)
338        else:
339            self._parser_state.error('empty constant')
340        return value
341
342    # atom = argument , op , value
343    #      ;
344    def _parse_atom(self, tokens):
345        if not tokens:
346            self._parser_state.error('missing argument')
347        argument = tokens.pop(0)
348        if argument.type != 'ARGUMENT':
349            self._parser_state.error('invalid argument', token=argument)
350
351        if not tokens:
352            self._parser_state.error('missing operator')
353        operator = tokens.pop(0)
354        if operator.type != 'OP':
355            self._parser_state.error('invalid operator', token=operator)
356
357        value = self.parse_value(tokens)
358        argument_index = int(argument.value[3:])
359        if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
360            self._parser_state.error('invalid argument', token=argument)
361        return Atom(argument_index, operator.value, value)
362
363    # clause = atom , [ { '&&' , atom } ]
364    #        ;
365    def _parse_clause(self, tokens):
366        atoms = []
367        while tokens:
368            atoms.append(self._parse_atom(tokens))
369            if not tokens or tokens[0].type != 'AND':
370                break
371            tokens.pop(0)
372        else:
373            self._parser_state.error('empty clause')
374        return atoms
375
376    # argument-expression = clause , [ { '||' , clause } ]
377    #                   ;
378    def parse_argument_expression(self, tokens):
379        """Parse a argument expression in Disjunctive Normal Form.
380
381        Since BPF disallows back jumps, we build the basic blocks in reverse
382        order so that all the jump targets are known by the time we need to
383        reference them.
384        """
385
386        clauses = []
387        while tokens:
388            clauses.append(self._parse_clause(tokens))
389            if not tokens or tokens[0].type != 'OR':
390                break
391            tokens.pop(0)
392        else:
393            self._parser_state.error('empty argument expression')
394        return clauses
395
396    # default-action = 'kill-process'
397    #                | 'kill-thread'
398    #                | 'kill'
399    #                | 'trap'
400    #                | 'user-notify'
401    #                ;
402    def _parse_default_action(self, tokens):
403        if not tokens:
404            self._parser_state.error('missing default action')
405        action_token = tokens.pop(0)
406        if action_token.type != 'ACTION':
407            return self._parser_state.error(
408                'invalid default action', token=action_token)
409        if action_token.value == 'kill-process':
410            return bpf.KillProcess()
411        if action_token.value == 'kill-thread':
412            return bpf.KillThread()
413        if action_token.value == 'kill':
414            return self._kill_action
415        if action_token.value == 'trap':
416            return bpf.Trap()
417        if action_token.value == 'user-notify':
418            return bpf.UserNotify()
419        return self._parser_state.error(
420            'invalid permissive default action', token=action_token)
421
422    # action = 'allow' | '1'
423    #        | 'kill-process'
424    #        | 'kill-thread'
425    #        | 'kill'
426    #        | 'trap'
427    #        | 'trace'
428    #        | 'log'
429    #        | 'user-notify'
430    #        | 'return' , single-constant
431    #        ;
432    def parse_action(self, tokens):
433        if not tokens:
434            self._parser_state.error('missing action')
435        action_token = tokens.pop(0)
436        # denylist policies must specify a return for every line.
437        if self._denylist:
438            if action_token.type != 'RETURN':
439                self._parser_state.error('invalid denylist policy')
440
441        if action_token.type == 'ACTION':
442            if action_token.value == 'allow':
443                return bpf.Allow()
444            if action_token.value == 'kill':
445                return self._kill_action
446            if action_token.value == 'kill-process':
447                return bpf.KillProcess()
448            if action_token.value == 'kill-thread':
449                return bpf.KillThread()
450            if action_token.value == 'trap':
451                return bpf.Trap()
452            if action_token.value == 'trace':
453                return bpf.Trace()
454            if action_token.value == 'user-notify':
455                return bpf.UserNotify()
456            if action_token.value == 'log':
457                return bpf.Log()
458        elif action_token.type == 'NUMERIC_CONSTANT':
459            constant = self._parse_single_constant(action_token)
460            if constant == 1:
461                return bpf.Allow()
462        elif action_token.type == 'RETURN':
463            if not tokens:
464                self._parser_state.error('missing return value')
465            if self._ret_log:
466                tokens.pop(0)
467                return bpf.Log()
468            else:
469                return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
470        return self._parser_state.error('invalid action', token=action_token)
471
472    # single-filter = action
473    #               | argument-expression , [ ';' , action ]
474    #               | '!','(', argument-expression, [ ';', action ], ')'
475    #               ;
476    def _parse_single_filter(self, tokens):
477        if not tokens:
478            self._parser_state.error('missing filter')
479        if tokens[0].type == 'ARGUMENT':
480	    # Only argument expressions can start with an ARGUMENT token.
481            argument_expression = self.parse_argument_expression(tokens)
482            if tokens and tokens[0].type == 'SEMICOLON':
483                tokens.pop(0)
484                action = self.parse_action(tokens)
485            else:
486                action = bpf.Allow()
487            return Filter(argument_expression, action)
488        else:
489            return Filter(None, self.parse_action(tokens))
490
491    # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
492    #        | single-filter
493    #        ;
494    def parse_filter(self, tokens):
495        """Parse a filter and return a list of Filter objects."""
496        if not tokens:
497            self._parser_state.error('missing filter')
498        filters = []
499        if tokens[0].type == 'LBRACE':
500            opening_brace = tokens.pop(0)
501            while tokens:
502                filters.append(self._parse_single_filter(tokens))
503                if not tokens or tokens[0].type != 'COMMA':
504                    break
505                tokens.pop(0)
506            if not tokens or tokens[0].type != 'RBRACE':
507                self._parser_state.error('unclosed brace', token=opening_brace)
508            tokens.pop(0)
509        else:
510            filters.append(self._parse_single_filter(tokens))
511        return filters
512
513    # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
514    #                ;
515    def _parse_key_value_pair(self, tokens):
516        if not tokens:
517            self._parser_state.error('missing key')
518        key = tokens.pop(0)
519        if key.type != 'IDENTIFIER':
520            self._parser_state.error('invalid key', token=key)
521        if not tokens:
522            self._parser_state.error('missing equal')
523        if tokens[0].type != 'EQUAL':
524            self._parser_state.error('invalid equal', token=tokens[0])
525        tokens.pop(0)
526        value_list = []
527        while tokens:
528            value = tokens.pop(0)
529            if value.type != 'IDENTIFIER':
530                self._parser_state.error('invalid value', token=value)
531            value_list.append(value.value)
532            if not tokens or tokens[0].type != 'COMMA':
533                break
534            tokens.pop(0)
535        else:
536            self._parser_state.error('empty value')
537        return (key.value, value_list)
538
539    # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
540    #          ;
541    def _parse_metadata(self, tokens):
542        if not tokens:
543            self._parser_state.error('missing opening bracket')
544        opening_bracket = tokens.pop(0)
545        if opening_bracket.type != 'LBRACKET':
546            self._parser_state.error(
547                'invalid opening bracket', token=opening_bracket)
548        metadata = {}
549        while tokens:
550            first_token = tokens[0]
551            key, value = self._parse_key_value_pair(tokens)
552            if key in metadata:
553                self._parser_state.error(
554                    'duplicate metadata key: "%s"' % key, token=first_token)
555            metadata[key] = value
556            if not tokens or tokens[0].type != 'SEMICOLON':
557                break
558            tokens.pop(0)
559        if not tokens or tokens[0].type != 'RBRACKET':
560            self._parser_state.error('unclosed bracket', token=opening_bracket)
561        tokens.pop(0)
562        return metadata
563
564    # syscall-descriptor = syscall-name , [ metadata ]
565    #                    | syscall-group-name , [ metadata ]
566    #                    ;
567    def _parse_syscall_descriptor(self, tokens):
568        if not tokens:
569            self._parser_state.error('missing syscall descriptor')
570        syscall_descriptor = tokens.pop(0)
571        # `kill` as a syscall name is a special case since kill is also a valid
572        # action and actions have precendence over identifiers.
573        if (syscall_descriptor.type != 'IDENTIFIER' and
574            syscall_descriptor.value != 'kill'):
575            self._parser_state.error(
576                'invalid syscall descriptor', token=syscall_descriptor)
577        if tokens and tokens[0].type == 'LBRACKET':
578            metadata = self._parse_metadata(tokens)
579            if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
580                return ()
581        if '@' in syscall_descriptor.value:
582            # This is a syscall group.
583            subtokens = syscall_descriptor.value.split('@')
584            if len(subtokens) != 2:
585                self._parser_state.error(
586                    'invalid syscall group name', token=syscall_descriptor)
587            syscall_group_name, syscall_namespace_name = subtokens
588            if syscall_namespace_name not in self._arch.syscall_groups:
589                self._parser_state.error(
590                    'nonexistent syscall group namespace',
591                    token=syscall_descriptor)
592            syscall_namespace = self._arch.syscall_groups[
593                syscall_namespace_name]
594            if syscall_group_name not in syscall_namespace:
595                self._parser_state.error(
596                    'nonexistent syscall group', token=syscall_descriptor)
597            return (Syscall(name, self._arch.syscalls[name])
598                    for name in syscall_namespace[syscall_group_name])
599        if syscall_descriptor.value not in self._arch.syscalls:
600            self._parser_state.error(
601                'nonexistent syscall', token=syscall_descriptor)
602        return (Syscall(syscall_descriptor.value,
603                        self._arch.syscalls[syscall_descriptor.value]), )
604
605    # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
606    #                       ':' , filter
607    #                  | syscall-descriptor , ':' , filter
608    #                  ;
609    def parse_filter_statement(self, tokens):
610        """Parse a filter statement and return a ParsedFilterStatement."""
611        if not tokens:
612            self._parser_state.error('empty filter statement')
613        syscall_descriptors = []
614        if tokens[0].type == 'LBRACE':
615            opening_brace = tokens.pop(0)
616            while tokens:
617                syscall_descriptors.extend(
618                    self._parse_syscall_descriptor(tokens))
619                if not tokens or tokens[0].type != 'COMMA':
620                    break
621                tokens.pop(0)
622            if not tokens or tokens[0].type != 'RBRACE':
623                self._parser_state.error('unclosed brace', token=opening_brace)
624            tokens.pop(0)
625        else:
626            syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
627        if not tokens:
628            self._parser_state.error('missing colon')
629        if tokens[0].type != 'COLON':
630            self._parser_state.error('invalid colon', token=tokens[0])
631        # Given that there can be multiple syscalls and filters in a single
632        # filter statement, use the colon token as the anchor for error location
633        # purposes.
634        colon_token = tokens.pop(0)
635        parsed_filter = self.parse_filter(tokens)
636        if not syscall_descriptors:
637            return None
638        return ParsedFilterStatement(
639            tuple(syscall_descriptors), parsed_filter, colon_token)
640
641    # include-statement = '@include' , posix-path
642    #                   ;
643    def _parse_include_statement(self, tokens):
644        if not tokens:
645            self._parser_state.error('empty filter statement')
646        if tokens[0].type != 'INCLUDE':
647            self._parser_state.error('invalid include', token=tokens[0])
648        tokens.pop(0)
649        if not tokens:
650            self._parser_state.error('empty include path')
651        include_path = tokens.pop(0)
652        if include_path.type != 'PATH':
653            self._parser_state.error(
654                'invalid include path', token=include_path)
655        if len(self._parser_states) == self._include_depth_limit:
656            self._parser_state.error('@include statement nested too deep')
657        include_filename = os.path.normpath(
658            os.path.join(
659                os.path.dirname(self._parser_state.filename),
660                include_path.value))
661        if not os.path.isfile(include_filename):
662            self._parser_state.error(
663                'Could not @include %s' % include_filename, token=include_path)
664        return self._parse_policy_file(include_filename)
665
666    def _parse_frequency_file(self, filename):
667        self._parser_states.append(ParserState(filename))
668        try:
669            frequency_mapping = collections.defaultdict(int)
670            with open(filename) as frequency_file:
671                for tokens in self._parser_state.tokenize(frequency_file):
672                    syscall_numbers = self._parse_syscall_descriptor(tokens)
673                    if not tokens:
674                        self._parser_state.error('missing colon')
675                    if tokens[0].type != 'COLON':
676                        self._parser_state.error(
677                            'invalid colon', token=tokens[0])
678                    tokens.pop(0)
679
680                    if not tokens:
681                        self._parser_state.error('missing number')
682                    number = tokens.pop(0)
683                    if number.type != 'NUMERIC_CONSTANT':
684                        self._parser_state.error(
685                            'invalid number', token=number)
686                    number_value = int(number.value, base=0)
687                    if number_value < 0:
688                        self._parser_state.error(
689                            'invalid number', token=number)
690
691                    for syscall_number in syscall_numbers:
692                        frequency_mapping[syscall_number] += number_value
693            return frequency_mapping
694        finally:
695            self._parser_states.pop()
696
697    # frequency-statement = '@frequency' , posix-path
698    #                      ;
699    def _parse_frequency_statement(self, tokens):
700        if not tokens:
701            self._parser_state.error('empty frequency statement')
702        if tokens[0].type != 'FREQUENCY':
703            self._parser_state.error('invalid frequency', token=tokens[0])
704        tokens.pop(0)
705        if not tokens:
706            self._parser_state.error('empty frequency path')
707        frequency_path = tokens.pop(0)
708        if frequency_path.type != 'PATH':
709            self._parser_state.error(
710                'invalid frequency path', token=frequency_path)
711        frequency_filename = os.path.normpath(
712            os.path.join(
713                os.path.dirname(self._parser_state.filename),
714                frequency_path.value))
715        if not os.path.isfile(frequency_filename):
716            self._parser_state.error(
717                'Could not open frequency file %s' % frequency_filename,
718                token=frequency_path)
719        return self._parse_frequency_file(frequency_filename)
720
721    # default-statement = '@default' , default-action
722    #                   ;
723    def _parse_default_statement(self, tokens):
724        if not tokens:
725            self._parser_state.error('empty default statement')
726        if tokens[0].type != 'DEFAULT':
727            self._parser_state.error('invalid default', token=tokens[0])
728        tokens.pop(0)
729        if not tokens:
730            self._parser_state.error('empty action')
731        return self._parse_default_action(tokens)
732
733    def _parse_policy_file(self, filename):
734        self._parser_states.append(ParserState(filename))
735        try:
736            statements = []
737            denylist_header = False
738            with open(filename) as policy_file:
739                for tokens in self._parser_state.tokenize(policy_file):
740                    if tokens[0].type == 'INCLUDE':
741                        statements.extend(
742                            self._parse_include_statement(tokens))
743                    elif tokens[0].type == 'FREQUENCY':
744                        for syscall_number, frequency in self._parse_frequency_statement(
745                                tokens).items():
746                            self._frequency_mapping[
747                                syscall_number] += frequency
748                    elif tokens[0].type == 'DEFAULT':
749                        self._default_action = self._parse_default_statement(
750                            tokens)
751                    elif tokens[0].type == 'DENYLIST':
752                        tokens.pop()
753                        if not self._denylist:
754                            self._parser_state.error('policy is denylist, but '
755                                                     'flag --denylist not '
756                                                     'passed in.')
757                        else:
758                            denylist_header = True
759                    else:
760                        statement = self.parse_filter_statement(tokens)
761                        if statement is None:
762                            # If all the syscalls in the statement are for
763                            # another arch, skip the whole statement.
764                            continue
765                        statements.append(statement)
766
767                    if tokens:
768                        self._parser_state.error(
769                            'extra tokens', token=tokens[0])
770            if self._denylist and not denylist_header:
771                self._parser_state.error('policy must contain @denylist flag to'
772                                         ' be compiled with --denylist flag.')
773            return statements
774        finally:
775            self._parser_states.pop()
776
777    def parse_file(self, filename):
778        """Parse a file and return the list of FilterStatements."""
779        self._frequency_mapping = collections.defaultdict(int)
780        try:
781            statements = [x for x in self._parse_policy_file(filename)]
782        except RecursionError:
783            raise ParseException(
784                'recursion limit exceeded',
785                filename,
786                line=self._parser_states[-1].line)
787
788        # Collapse statements into a single syscall-to-filter-list, remembering
789        # the token for each filter for better diagnostics.
790        syscall_filter_mapping = {}
791        syscall_filter_definitions = {}
792        filter_statements = []
793        for syscalls, filters, token in statements:
794            for syscall in syscalls:
795                if syscall not in syscall_filter_mapping:
796                    filter_statements.append(
797                        FilterStatement(
798                            syscall, self._frequency_mapping.get(syscall, 1),
799                            []))
800                    syscall_filter_mapping[syscall] = filter_statements[-1]
801                    syscall_filter_definitions[syscall] = []
802                for filt in filters:
803                    syscall_filter_mapping[syscall].filters.append(filt)
804                    syscall_filter_definitions[syscall].append(token)
805        default_action = self._override_default_action or self._default_action
806        for filter_statement in filter_statements:
807            unconditional_actions_suffix = list(
808                itertools.dropwhile(lambda filt: filt.expression is not None,
809                                    filter_statement.filters))
810            if len(unconditional_actions_suffix) == 1:
811                # The last filter already has an unconditional action, no need
812                # to add another one.
813                continue
814            if len(unconditional_actions_suffix) > 1:
815                previous_definition_token = syscall_filter_definitions[
816                    filter_statement.syscall][
817                        -len(unconditional_actions_suffix)]
818                current_definition_token = syscall_filter_definitions[
819                    filter_statement.syscall][
820                        -len(unconditional_actions_suffix) + 1]
821                raise ParseException(
822                    ('Syscall %s (number %d) already had '
823                     'an unconditional action applied') %
824                    (filter_statement.syscall.name,
825                     filter_statement.syscall.number),
826                    filename=current_definition_token.filename,
827                    token=current_definition_token) from ParseException(
828                        'Previous definition',
829                        filename=previous_definition_token.filename,
830                        token=previous_definition_token)
831            assert not unconditional_actions_suffix
832            filter_statement.filters.append(
833                Filter(expression=None, action=default_action))
834        return ParsedPolicy(default_action, filter_statements)
835