1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# Copyright (C) 2018 The Android Open Source Project 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17"""A parser for the Minijail policy file.""" 18 19from __future__ import absolute_import 20from __future__ import division 21from __future__ import print_function 22 23import collections 24import itertools 25import os.path 26import re 27 28try: 29 import bpf 30except ImportError: 31 from minijail import bpf 32 33 34# Representations of numbers with different radix (base) in C. 35HEX_REGEX = r'-?0[xX][0-9a-fA-F]+' 36OCTAL_REGEX = r'-?0[0-7]+' 37DECIMAL_REGEX = r'-?[0-9]+' 38 39 40Token = collections.namedtuple( 41 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column']) 42 43# A regex that can tokenize a Minijail policy file line. 44_TOKEN_SPECIFICATION = ( 45 ('COMMENT', r'#.*$'), 46 ('WHITESPACE', r'\s+'), 47 ('CONTINUATION', r'\\$'), 48 ('DEFAULT', r'@default\b'), 49 ('INCLUDE', r'@include\b'), 50 ('FREQUENCY', r'@frequency\b'), 51 ('DENYLIST', r'@denylist$'), 52 ('PATH', r'(?:\.)?/\S+'), 53 ('NUMERIC_CONSTANT', f'{HEX_REGEX}|{OCTAL_REGEX}|{DECIMAL_REGEX}'), 54 ('COLON', r':'), 55 ('SEMICOLON', r';'), 56 ('COMMA', r','), 57 ('BITWISE_COMPLEMENT', r'~'), 58 ('LPAREN', r'\('), 59 ('RPAREN', r'\)'), 60 ('LBRACE', r'\{'), 61 ('RBRACE', r'\}'), 62 ('RBRACKET', r'\]'), 63 ('LBRACKET', r'\['), 64 ('OR', r'\|\|'), 65 ('AND', r'&&'), 66 ('BITWISE_OR', r'\|'), 67 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'), 68 ('EQUAL', r'='), 69 ('ARGUMENT', r'\barg[0-9]+\b'), 70 ('RETURN', r'\breturn\b'), 71 ('ACTION', 72 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|' 73 r'\btrace\b|\blog\b|\buser-notify\b' 74 ), 75 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'), 76) 77_TOKEN_RE = re.compile('|'.join( 78 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION)) 79 80 81class ParseException(Exception): 82 """An exception that is raised when parsing fails.""" 83 84 # pylint: disable=too-many-arguments 85 def __init__(self, 86 message, 87 filename, 88 *, 89 line='', 90 line_number=1, 91 token=None): 92 if token: 93 line = token.line 94 line_number = token.line_number 95 column = token.column 96 length = len(token.value) 97 else: 98 column = len(line) 99 length = 1 100 101 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1, 102 message) 103 message += '\n %s' % line 104 message += '\n %s%s' % (' ' * column, '^' * length) 105 super().__init__(message) 106 107 108class ParserState: 109 """Stores the state of the Parser to provide better diagnostics.""" 110 111 def __init__(self, filename): 112 self._filename = filename 113 self._line = '' 114 self._line_number = 0 115 116 @property 117 def filename(self): 118 """Return the name of the file being processed.""" 119 return self._filename 120 121 @property 122 def line(self): 123 """Return the current line being processed.""" 124 return self._line 125 126 @property 127 def line_number(self): 128 """Return the current line number being processed.""" 129 return self._line_number 130 131 def error(self, message, token=None): 132 """Raise a ParserException with the provided message.""" 133 raise ParseException( 134 message, 135 self.filename, 136 line=self._line, 137 line_number=self._line_number, 138 token=token) 139 140 def tokenize(self, lines): 141 """Return a list of tokens for the current line.""" 142 tokens = [] 143 144 for line_number, line in enumerate(lines): 145 self._line_number = line_number + 1 146 self._line = line.rstrip('\r\n') 147 148 last_end = 0 149 for token in _TOKEN_RE.finditer(self._line): 150 if token.start() != last_end: 151 self.error( 152 'invalid token', 153 token=Token('INVALID', 154 self._line[last_end:token.start()], 155 self.filename, self._line, 156 self._line_number, last_end)) 157 last_end = token.end() 158 159 # Omit whitespace and comments now to avoid sprinkling this logic 160 # elsewhere. 161 if token.lastgroup in ('WHITESPACE', 'COMMENT', 162 'CONTINUATION'): 163 continue 164 tokens.append( 165 Token(token.lastgroup, token.group(), self.filename, 166 self._line, self._line_number, token.start())) 167 if last_end != len(self._line): 168 self.error( 169 'invalid token', 170 token=Token('INVALID', self._line[last_end:], 171 self.filename, self._line, self._line_number, 172 last_end)) 173 174 if self._line.endswith('\\'): 175 # This line is not finished yet. 176 continue 177 178 if tokens: 179 # Return a copy of the token list so that the caller can be free 180 # to modify it. 181 yield tokens[::] 182 tokens.clear() 183 184 185Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value']) 186"""A single boolean comparison within a filter expression.""" 187 188Filter = collections.namedtuple('Filter', ['expression', 'action']) 189"""The result of parsing a DNF filter expression, with its action. 190 191Since the expression is in Disjunctive Normal Form, it is composed of two levels 192of lists, one for disjunctions and the inner one for conjunctions. The elements 193of the inner list are Atoms. 194""" 195 196Syscall = collections.namedtuple('Syscall', ['name', 'number']) 197"""A system call.""" 198 199ParsedFilterStatement = collections.namedtuple( 200 'ParsedFilterStatement', ['syscalls', 'filters', 'token']) 201"""The result of parsing a filter statement. 202 203Statements have a list of syscalls, and an associated list of filters that will 204be evaluated sequentially when any of the syscalls is invoked. 205""" 206 207FilterStatement = collections.namedtuple('FilterStatement', 208 ['syscall', 'frequency', 'filters']) 209"""The filter list for a particular syscall. 210 211This is a mapping from one syscall to a list of filters that are evaluated 212sequentially. The last filter is always an unconditional action. 213""" 214 215ParsedPolicy = collections.namedtuple('ParsedPolicy', 216 ['default_action', 'filter_statements']) 217"""The result of parsing a minijail .policy file.""" 218 219 220# pylint: disable=too-few-public-methods 221class PolicyParser: 222 """A parser for the Minijail seccomp policy file format.""" 223 224 def __init__(self, 225 arch, 226 *, 227 kill_action, 228 include_depth_limit=10, 229 override_default_action=None, 230 denylist=False, 231 ret_log=False): 232 self._parser_states = [ParserState("<memory>")] 233 self._kill_action = kill_action 234 self._include_depth_limit = include_depth_limit 235 if denylist: 236 self._default_action = bpf.Allow() 237 else: 238 self._default_action = self._kill_action 239 self._override_default_action = override_default_action 240 self._frequency_mapping = collections.defaultdict(int) 241 self._arch = arch 242 self._denylist = denylist 243 self._ret_log = ret_log 244 245 @property 246 def _parser_state(self): 247 return self._parser_states[-1] 248 249 # single-constant = identifier 250 # | numeric-constant 251 # ; 252 def _parse_single_constant(self, token): 253 if token.type == 'IDENTIFIER': 254 if token.value not in self._arch.constants: 255 self._parser_state.error('invalid constant', token=token) 256 single_constant = self._arch.constants[token.value] 257 elif token.type == 'NUMERIC_CONSTANT': 258 # As `int(_, 0)` in Python != `strtol(_, _, 0)` in C, to make sure 259 # the number parsing behaves exactly in C, instead of using `int()` 260 # directly, we list out all the possible formats for octal, decimal 261 # and hex numbers, and determine the corresponding base by regex. 262 try: 263 if re.match(HEX_REGEX, token.value): 264 base = 16 265 elif re.match(OCTAL_REGEX, token.value): 266 base = 8 267 elif re.match(DECIMAL_REGEX, token.value): 268 base = 10 269 else: 270 # This should never happen. 271 raise ValueError 272 single_constant = int(token.value, base=base) 273 except ValueError: 274 self._parser_state.error('invalid constant', token=token) 275 else: 276 self._parser_state.error('invalid constant', token=token) 277 if single_constant > self._arch.max_unsigned: 278 self._parser_state.error('unsigned overflow', token=token) 279 elif single_constant < self._arch.min_signed: 280 self._parser_state.error('signed underflow', token=token) 281 elif single_constant < 0: 282 # This converts the constant to an unsigned representation of the 283 # same value, since BPF only uses unsigned values. 284 single_constant = self._arch.truncate_word(single_constant) 285 return single_constant 286 287 # constant = [ '~' ] , '(' , value , ')' 288 # | [ '~' ] , single-constant 289 # ; 290 def _parse_constant(self, tokens): 291 negate = False 292 if tokens[0].type == 'BITWISE_COMPLEMENT': 293 negate = True 294 tokens.pop(0) 295 if not tokens: 296 self._parser_state.error('empty complement') 297 if tokens[0].type == 'BITWISE_COMPLEMENT': 298 self._parser_state.error( 299 'invalid double complement', token=tokens[0]) 300 if tokens[0].type == 'LPAREN': 301 last_open_paren = tokens.pop(0) 302 single_value = self.parse_value(tokens) 303 if not tokens or tokens[0].type != 'RPAREN': 304 self._parser_state.error( 305 'unclosed parenthesis', token=last_open_paren) 306 else: 307 single_value = self._parse_single_constant(tokens[0]) 308 tokens.pop(0) 309 if negate: 310 single_value = self._arch.truncate_word(~single_value) 311 return single_value 312 313 # value = constant , [ { '|' , constant } ] 314 # ; 315 def parse_value(self, tokens): 316 """Parse constants separated bitwise OR operator |. 317 318 Constants can be: 319 320 - A number that can be parsed with strtol() in C. 321 - A named constant expression. 322 - A parenthesized, valid constant expression. 323 - A valid constant expression prefixed with the unary bitwise 324 complement operator ~. 325 - A series of valid constant expressions separated by bitwise 326 OR operator |. 327 328 If there is an error parsing any of the constants, the whole process 329 fails. 330 """ 331 332 value = 0 333 while tokens: 334 value |= self._parse_constant(tokens) 335 if not tokens or tokens[0].type != 'BITWISE_OR': 336 break 337 tokens.pop(0) 338 else: 339 self._parser_state.error('empty constant') 340 return value 341 342 # atom = argument , op , value 343 # ; 344 def _parse_atom(self, tokens): 345 if not tokens: 346 self._parser_state.error('missing argument') 347 argument = tokens.pop(0) 348 if argument.type != 'ARGUMENT': 349 self._parser_state.error('invalid argument', token=argument) 350 351 if not tokens: 352 self._parser_state.error('missing operator') 353 operator = tokens.pop(0) 354 if operator.type != 'OP': 355 self._parser_state.error('invalid operator', token=operator) 356 357 value = self.parse_value(tokens) 358 argument_index = int(argument.value[3:]) 359 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS): 360 self._parser_state.error('invalid argument', token=argument) 361 return Atom(argument_index, operator.value, value) 362 363 # clause = atom , [ { '&&' , atom } ] 364 # ; 365 def _parse_clause(self, tokens): 366 atoms = [] 367 while tokens: 368 atoms.append(self._parse_atom(tokens)) 369 if not tokens or tokens[0].type != 'AND': 370 break 371 tokens.pop(0) 372 else: 373 self._parser_state.error('empty clause') 374 return atoms 375 376 # argument-expression = clause , [ { '||' , clause } ] 377 # ; 378 def parse_argument_expression(self, tokens): 379 """Parse a argument expression in Disjunctive Normal Form. 380 381 Since BPF disallows back jumps, we build the basic blocks in reverse 382 order so that all the jump targets are known by the time we need to 383 reference them. 384 """ 385 386 clauses = [] 387 while tokens: 388 clauses.append(self._parse_clause(tokens)) 389 if not tokens or tokens[0].type != 'OR': 390 break 391 tokens.pop(0) 392 else: 393 self._parser_state.error('empty argument expression') 394 return clauses 395 396 # default-action = 'kill-process' 397 # | 'kill-thread' 398 # | 'kill' 399 # | 'trap' 400 # | 'user-notify' 401 # ; 402 def _parse_default_action(self, tokens): 403 if not tokens: 404 self._parser_state.error('missing default action') 405 action_token = tokens.pop(0) 406 if action_token.type != 'ACTION': 407 return self._parser_state.error( 408 'invalid default action', token=action_token) 409 if action_token.value == 'kill-process': 410 return bpf.KillProcess() 411 if action_token.value == 'kill-thread': 412 return bpf.KillThread() 413 if action_token.value == 'kill': 414 return self._kill_action 415 if action_token.value == 'trap': 416 return bpf.Trap() 417 if action_token.value == 'user-notify': 418 return bpf.UserNotify() 419 return self._parser_state.error( 420 'invalid permissive default action', token=action_token) 421 422 # action = 'allow' | '1' 423 # | 'kill-process' 424 # | 'kill-thread' 425 # | 'kill' 426 # | 'trap' 427 # | 'trace' 428 # | 'log' 429 # | 'user-notify' 430 # | 'return' , single-constant 431 # ; 432 def parse_action(self, tokens): 433 if not tokens: 434 self._parser_state.error('missing action') 435 action_token = tokens.pop(0) 436 # denylist policies must specify a return for every line. 437 if self._denylist: 438 if action_token.type != 'RETURN': 439 self._parser_state.error('invalid denylist policy') 440 441 if action_token.type == 'ACTION': 442 if action_token.value == 'allow': 443 return bpf.Allow() 444 if action_token.value == 'kill': 445 return self._kill_action 446 if action_token.value == 'kill-process': 447 return bpf.KillProcess() 448 if action_token.value == 'kill-thread': 449 return bpf.KillThread() 450 if action_token.value == 'trap': 451 return bpf.Trap() 452 if action_token.value == 'trace': 453 return bpf.Trace() 454 if action_token.value == 'user-notify': 455 return bpf.UserNotify() 456 if action_token.value == 'log': 457 return bpf.Log() 458 elif action_token.type == 'NUMERIC_CONSTANT': 459 constant = self._parse_single_constant(action_token) 460 if constant == 1: 461 return bpf.Allow() 462 elif action_token.type == 'RETURN': 463 if not tokens: 464 self._parser_state.error('missing return value') 465 if self._ret_log: 466 tokens.pop(0) 467 return bpf.Log() 468 else: 469 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0))) 470 return self._parser_state.error('invalid action', token=action_token) 471 472 # single-filter = action 473 # | argument-expression , [ ';' , action ] 474 # | '!','(', argument-expression, [ ';', action ], ')' 475 # ; 476 def _parse_single_filter(self, tokens): 477 if not tokens: 478 self._parser_state.error('missing filter') 479 if tokens[0].type == 'ARGUMENT': 480 # Only argument expressions can start with an ARGUMENT token. 481 argument_expression = self.parse_argument_expression(tokens) 482 if tokens and tokens[0].type == 'SEMICOLON': 483 tokens.pop(0) 484 action = self.parse_action(tokens) 485 else: 486 action = bpf.Allow() 487 return Filter(argument_expression, action) 488 else: 489 return Filter(None, self.parse_action(tokens)) 490 491 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}' 492 # | single-filter 493 # ; 494 def parse_filter(self, tokens): 495 """Parse a filter and return a list of Filter objects.""" 496 if not tokens: 497 self._parser_state.error('missing filter') 498 filters = [] 499 if tokens[0].type == 'LBRACE': 500 opening_brace = tokens.pop(0) 501 while tokens: 502 filters.append(self._parse_single_filter(tokens)) 503 if not tokens or tokens[0].type != 'COMMA': 504 break 505 tokens.pop(0) 506 if not tokens or tokens[0].type != 'RBRACE': 507 self._parser_state.error('unclosed brace', token=opening_brace) 508 tokens.pop(0) 509 else: 510 filters.append(self._parse_single_filter(tokens)) 511 return filters 512 513 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ] 514 # ; 515 def _parse_key_value_pair(self, tokens): 516 if not tokens: 517 self._parser_state.error('missing key') 518 key = tokens.pop(0) 519 if key.type != 'IDENTIFIER': 520 self._parser_state.error('invalid key', token=key) 521 if not tokens: 522 self._parser_state.error('missing equal') 523 if tokens[0].type != 'EQUAL': 524 self._parser_state.error('invalid equal', token=tokens[0]) 525 tokens.pop(0) 526 value_list = [] 527 while tokens: 528 value = tokens.pop(0) 529 if value.type != 'IDENTIFIER': 530 self._parser_state.error('invalid value', token=value) 531 value_list.append(value.value) 532 if not tokens or tokens[0].type != 'COMMA': 533 break 534 tokens.pop(0) 535 else: 536 self._parser_state.error('empty value') 537 return (key.value, value_list) 538 539 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']' 540 # ; 541 def _parse_metadata(self, tokens): 542 if not tokens: 543 self._parser_state.error('missing opening bracket') 544 opening_bracket = tokens.pop(0) 545 if opening_bracket.type != 'LBRACKET': 546 self._parser_state.error( 547 'invalid opening bracket', token=opening_bracket) 548 metadata = {} 549 while tokens: 550 first_token = tokens[0] 551 key, value = self._parse_key_value_pair(tokens) 552 if key in metadata: 553 self._parser_state.error( 554 'duplicate metadata key: "%s"' % key, token=first_token) 555 metadata[key] = value 556 if not tokens or tokens[0].type != 'SEMICOLON': 557 break 558 tokens.pop(0) 559 if not tokens or tokens[0].type != 'RBRACKET': 560 self._parser_state.error('unclosed bracket', token=opening_bracket) 561 tokens.pop(0) 562 return metadata 563 564 # syscall-descriptor = syscall-name , [ metadata ] 565 # | syscall-group-name , [ metadata ] 566 # ; 567 def _parse_syscall_descriptor(self, tokens): 568 if not tokens: 569 self._parser_state.error('missing syscall descriptor') 570 syscall_descriptor = tokens.pop(0) 571 # `kill` as a syscall name is a special case since kill is also a valid 572 # action and actions have precendence over identifiers. 573 if (syscall_descriptor.type != 'IDENTIFIER' and 574 syscall_descriptor.value != 'kill'): 575 self._parser_state.error( 576 'invalid syscall descriptor', token=syscall_descriptor) 577 if tokens and tokens[0].type == 'LBRACKET': 578 metadata = self._parse_metadata(tokens) 579 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']: 580 return () 581 if '@' in syscall_descriptor.value: 582 # This is a syscall group. 583 subtokens = syscall_descriptor.value.split('@') 584 if len(subtokens) != 2: 585 self._parser_state.error( 586 'invalid syscall group name', token=syscall_descriptor) 587 syscall_group_name, syscall_namespace_name = subtokens 588 if syscall_namespace_name not in self._arch.syscall_groups: 589 self._parser_state.error( 590 'nonexistent syscall group namespace', 591 token=syscall_descriptor) 592 syscall_namespace = self._arch.syscall_groups[ 593 syscall_namespace_name] 594 if syscall_group_name not in syscall_namespace: 595 self._parser_state.error( 596 'nonexistent syscall group', token=syscall_descriptor) 597 return (Syscall(name, self._arch.syscalls[name]) 598 for name in syscall_namespace[syscall_group_name]) 599 if syscall_descriptor.value not in self._arch.syscalls: 600 self._parser_state.error( 601 'nonexistent syscall', token=syscall_descriptor) 602 return (Syscall(syscall_descriptor.value, 603 self._arch.syscalls[syscall_descriptor.value]), ) 604 605 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' , 606 # ':' , filter 607 # | syscall-descriptor , ':' , filter 608 # ; 609 def parse_filter_statement(self, tokens): 610 """Parse a filter statement and return a ParsedFilterStatement.""" 611 if not tokens: 612 self._parser_state.error('empty filter statement') 613 syscall_descriptors = [] 614 if tokens[0].type == 'LBRACE': 615 opening_brace = tokens.pop(0) 616 while tokens: 617 syscall_descriptors.extend( 618 self._parse_syscall_descriptor(tokens)) 619 if not tokens or tokens[0].type != 'COMMA': 620 break 621 tokens.pop(0) 622 if not tokens or tokens[0].type != 'RBRACE': 623 self._parser_state.error('unclosed brace', token=opening_brace) 624 tokens.pop(0) 625 else: 626 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens)) 627 if not tokens: 628 self._parser_state.error('missing colon') 629 if tokens[0].type != 'COLON': 630 self._parser_state.error('invalid colon', token=tokens[0]) 631 # Given that there can be multiple syscalls and filters in a single 632 # filter statement, use the colon token as the anchor for error location 633 # purposes. 634 colon_token = tokens.pop(0) 635 parsed_filter = self.parse_filter(tokens) 636 if not syscall_descriptors: 637 return None 638 return ParsedFilterStatement( 639 tuple(syscall_descriptors), parsed_filter, colon_token) 640 641 # include-statement = '@include' , posix-path 642 # ; 643 def _parse_include_statement(self, tokens): 644 if not tokens: 645 self._parser_state.error('empty filter statement') 646 if tokens[0].type != 'INCLUDE': 647 self._parser_state.error('invalid include', token=tokens[0]) 648 tokens.pop(0) 649 if not tokens: 650 self._parser_state.error('empty include path') 651 include_path = tokens.pop(0) 652 if include_path.type != 'PATH': 653 self._parser_state.error( 654 'invalid include path', token=include_path) 655 if len(self._parser_states) == self._include_depth_limit: 656 self._parser_state.error('@include statement nested too deep') 657 include_filename = os.path.normpath( 658 os.path.join( 659 os.path.dirname(self._parser_state.filename), 660 include_path.value)) 661 if not os.path.isfile(include_filename): 662 self._parser_state.error( 663 'Could not @include %s' % include_filename, token=include_path) 664 return self._parse_policy_file(include_filename) 665 666 def _parse_frequency_file(self, filename): 667 self._parser_states.append(ParserState(filename)) 668 try: 669 frequency_mapping = collections.defaultdict(int) 670 with open(filename) as frequency_file: 671 for tokens in self._parser_state.tokenize(frequency_file): 672 syscall_numbers = self._parse_syscall_descriptor(tokens) 673 if not tokens: 674 self._parser_state.error('missing colon') 675 if tokens[0].type != 'COLON': 676 self._parser_state.error( 677 'invalid colon', token=tokens[0]) 678 tokens.pop(0) 679 680 if not tokens: 681 self._parser_state.error('missing number') 682 number = tokens.pop(0) 683 if number.type != 'NUMERIC_CONSTANT': 684 self._parser_state.error( 685 'invalid number', token=number) 686 number_value = int(number.value, base=0) 687 if number_value < 0: 688 self._parser_state.error( 689 'invalid number', token=number) 690 691 for syscall_number in syscall_numbers: 692 frequency_mapping[syscall_number] += number_value 693 return frequency_mapping 694 finally: 695 self._parser_states.pop() 696 697 # frequency-statement = '@frequency' , posix-path 698 # ; 699 def _parse_frequency_statement(self, tokens): 700 if not tokens: 701 self._parser_state.error('empty frequency statement') 702 if tokens[0].type != 'FREQUENCY': 703 self._parser_state.error('invalid frequency', token=tokens[0]) 704 tokens.pop(0) 705 if not tokens: 706 self._parser_state.error('empty frequency path') 707 frequency_path = tokens.pop(0) 708 if frequency_path.type != 'PATH': 709 self._parser_state.error( 710 'invalid frequency path', token=frequency_path) 711 frequency_filename = os.path.normpath( 712 os.path.join( 713 os.path.dirname(self._parser_state.filename), 714 frequency_path.value)) 715 if not os.path.isfile(frequency_filename): 716 self._parser_state.error( 717 'Could not open frequency file %s' % frequency_filename, 718 token=frequency_path) 719 return self._parse_frequency_file(frequency_filename) 720 721 # default-statement = '@default' , default-action 722 # ; 723 def _parse_default_statement(self, tokens): 724 if not tokens: 725 self._parser_state.error('empty default statement') 726 if tokens[0].type != 'DEFAULT': 727 self._parser_state.error('invalid default', token=tokens[0]) 728 tokens.pop(0) 729 if not tokens: 730 self._parser_state.error('empty action') 731 return self._parse_default_action(tokens) 732 733 def _parse_policy_file(self, filename): 734 self._parser_states.append(ParserState(filename)) 735 try: 736 statements = [] 737 denylist_header = False 738 with open(filename) as policy_file: 739 for tokens in self._parser_state.tokenize(policy_file): 740 if tokens[0].type == 'INCLUDE': 741 statements.extend( 742 self._parse_include_statement(tokens)) 743 elif tokens[0].type == 'FREQUENCY': 744 for syscall_number, frequency in self._parse_frequency_statement( 745 tokens).items(): 746 self._frequency_mapping[ 747 syscall_number] += frequency 748 elif tokens[0].type == 'DEFAULT': 749 self._default_action = self._parse_default_statement( 750 tokens) 751 elif tokens[0].type == 'DENYLIST': 752 tokens.pop() 753 if not self._denylist: 754 self._parser_state.error('policy is denylist, but ' 755 'flag --denylist not ' 756 'passed in.') 757 else: 758 denylist_header = True 759 else: 760 statement = self.parse_filter_statement(tokens) 761 if statement is None: 762 # If all the syscalls in the statement are for 763 # another arch, skip the whole statement. 764 continue 765 statements.append(statement) 766 767 if tokens: 768 self._parser_state.error( 769 'extra tokens', token=tokens[0]) 770 if self._denylist and not denylist_header: 771 self._parser_state.error('policy must contain @denylist flag to' 772 ' be compiled with --denylist flag.') 773 return statements 774 finally: 775 self._parser_states.pop() 776 777 def parse_file(self, filename): 778 """Parse a file and return the list of FilterStatements.""" 779 self._frequency_mapping = collections.defaultdict(int) 780 try: 781 statements = [x for x in self._parse_policy_file(filename)] 782 except RecursionError: 783 raise ParseException( 784 'recursion limit exceeded', 785 filename, 786 line=self._parser_states[-1].line) 787 788 # Collapse statements into a single syscall-to-filter-list, remembering 789 # the token for each filter for better diagnostics. 790 syscall_filter_mapping = {} 791 syscall_filter_definitions = {} 792 filter_statements = [] 793 for syscalls, filters, token in statements: 794 for syscall in syscalls: 795 if syscall not in syscall_filter_mapping: 796 filter_statements.append( 797 FilterStatement( 798 syscall, self._frequency_mapping.get(syscall, 1), 799 [])) 800 syscall_filter_mapping[syscall] = filter_statements[-1] 801 syscall_filter_definitions[syscall] = [] 802 for filt in filters: 803 syscall_filter_mapping[syscall].filters.append(filt) 804 syscall_filter_definitions[syscall].append(token) 805 default_action = self._override_default_action or self._default_action 806 for filter_statement in filter_statements: 807 unconditional_actions_suffix = list( 808 itertools.dropwhile(lambda filt: filt.expression is not None, 809 filter_statement.filters)) 810 if len(unconditional_actions_suffix) == 1: 811 # The last filter already has an unconditional action, no need 812 # to add another one. 813 continue 814 if len(unconditional_actions_suffix) > 1: 815 previous_definition_token = syscall_filter_definitions[ 816 filter_statement.syscall][ 817 -len(unconditional_actions_suffix)] 818 current_definition_token = syscall_filter_definitions[ 819 filter_statement.syscall][ 820 -len(unconditional_actions_suffix) + 1] 821 raise ParseException( 822 ('Syscall %s (number %d) already had ' 823 'an unconditional action applied') % 824 (filter_statement.syscall.name, 825 filter_statement.syscall.number), 826 filename=current_definition_token.filename, 827 token=current_definition_token) from ParseException( 828 'Previous definition', 829 filename=previous_definition_token.filename, 830 token=previous_definition_token) 831 assert not unconditional_actions_suffix 832 filter_statement.filters.append( 833 Filter(expression=None, action=default_action)) 834 return ParsedPolicy(default_action, filter_statements) 835