1#!/usr/bin/env python3 2# Copyright (C) 2022 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16from abc import ABC 17from dataclasses import dataclass 18import re 19import sys 20from typing import Dict, List, Optional, Set, NamedTuple 21 22from python.generators.sql_processing.docs_extractor import DocsExtractor 23from python.generators.sql_processing.utils import ObjKind 24from python.generators.sql_processing.utils import COLUMN_TYPES, MACRO_ARG_TYPES 25 26from python.generators.sql_processing.utils import ALLOWED_PREFIXES 27from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST 28 29from python.generators.sql_processing.utils import ANY_PATTERN 30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN 31 32 33def _is_internal(name: str) -> bool: 34 return re.match(r'^_.*', name, re.IGNORECASE) is not None 35 36 37def _is_snake_case(s: str) -> bool: 38 return re.fullmatch(r'^[a-z_0-9]*$', s) is not None 39 40 41def parse_comment(comment: str) -> str: 42 """ 43 Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar."). 44 """ 45 return ' '.join(line.strip().lstrip('--').lstrip() 46 for line in comment.strip().split('\n')) 47 48 49def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]: 50 """Returns error message if the name is not correct, None otherwise.""" 51 if module in ["common", "prelude", "deprecated"]: 52 if name.startswith(module): 53 return (f'Names of tables/views/functions in the "{module}" module ' 54 f'should not start with {module}') 55 return None 56 if name.startswith(module): 57 # Module prefix is always allowed. 58 return None 59 allowed_prefixes = [module] 60 for (path_prefix, allowed_name_prefixes) in ALLOWED_PREFIXES.items(): 61 if path.startswith(path_prefix): 62 for prefix in allowed_name_prefixes: 63 if name.startswith(prefix): 64 return None 65 allowed_prefixes.extend(allowed_name_prefixes) 66 if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]: 67 return None 68 return ( 69 f'Names of tables/views/functions at path "{path}" should be prefixed ' 70 f'with one of following names: {", ".join(allowed_prefixes)}') 71 72 73class Arg(NamedTuple): 74 type: str 75 long_type: str 76 description: str 77 78 79class AbstractDocParser(ABC): 80 81 @dataclass 82 class Column: 83 pass 84 85 def __init__(self, path: str, module: str): 86 self.path = path 87 self.module = module 88 self.name = None 89 self.errors = [] 90 91 def _parse_name(self, upper: bool = False): 92 assert self.name 93 assert isinstance(self.name, str) 94 module_prefix_error = get_module_prefix_error(self.name, self.path, 95 self.module) 96 if module_prefix_error is not None: 97 self._error(module_prefix_error) 98 return self.name.strip() 99 100 def _parse_desc_not_empty(self, desc: str): 101 if not desc: 102 self._error('Description of the table/view/function/macro is missing') 103 return desc.strip() 104 105 def _parse_columns(self, schema: str, kind: ObjKind) -> Dict[str, Arg]: 106 columns = self._parse_args_definition(schema) if schema else {} 107 for column_name, properties in columns.items(): 108 if not properties.description: 109 self._error(f'Column "{column_name}" is missing a description. Please add a ' 110 'comment in front of the column definition') 111 continue 112 113 upper_arg_type = properties.type.upper() 114 if kind is ObjKind.table_function: 115 if upper_arg_type not in COLUMN_TYPES: 116 self._error( 117 f'Table function column "{column_name}" has unsupported type "{properties.type}".') 118 elif kind is ObjKind.table_view: 119 if upper_arg_type not in COLUMN_TYPES: 120 self._error( 121 f'Table/view column "{column_name}" has unsupported type "{properties.type}".') 122 else: 123 self._error(f'This Perfetto SQL object doesnt support columns".') 124 125 return columns 126 127 def _parse_args(self, sql_args_str: str, kind: ObjKind) -> Dict[str, Arg]: 128 args = self._parse_args_definition(sql_args_str) 129 130 for arg in args: 131 if not args[arg].description: 132 self._error(f'Arg "{arg}" is missing a description. ' 133 'Please add a comment in front of the arg definition.') 134 135 upper_arg_type = args[arg].type.upper() 136 if (kind is ObjKind.function or kind is ObjKind.table_function): 137 if upper_arg_type not in COLUMN_TYPES: 138 self._error( 139 f'Function arg "{arg}" has unsupported type "{args[arg].type}".') 140 elif (kind is ObjKind.macro): 141 if upper_arg_type not in MACRO_ARG_TYPES: 142 self._error( 143 f'Macro arg "{arg}" has unsupported type "{args[arg].type}".') 144 else: 145 self._error(f'This Perfetto SQL object doesnt support types".') 146 147 return args 148 149 # Parse function argument definition list or a table schema, e.g. 150 # arg1 INT, arg2 STRING, including their comments. 151 def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]: 152 result = {} 153 remaining_args = args_str.strip() 154 while remaining_args: 155 m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args) 156 if not m: 157 self._error(f'Expected "{args_str}" to correspond to ' 158 '"-- Comment\n arg_name TYPE" format ' 159 '({ARG_DEFINITION_PATTERN})') 160 return result 161 groups = m.groups() 162 comment = '' if groups[0] is None else parse_comment(groups[0]) 163 name = groups[-3] 164 type = groups[-2] 165 166 m = re.match(r'JOINID\(([_A-Za-z\.]*)\)', type) 167 if m: 168 result[name] = Arg('JOINID', type, comment) 169 remaining_args = groups[-1].lstrip().lstrip(',').lstrip() 170 continue 171 172 m = re.match(r'ID\(([_A-Za-z\.]*)\)', type) 173 if m: 174 result[name] = Arg('ID', type, comment) 175 remaining_args = groups[-1].lstrip().lstrip(',').lstrip() 176 continue 177 178 result[name] = Arg(type, type, comment) 179 # Strip whitespace and comma and parse the next arg. 180 remaining_args = groups[-1].lstrip().lstrip(',').lstrip() 181 182 return result 183 184 def _error(self, error: str): 185 self.errors.append( 186 f'Error while parsing documentation for "{self.name}" in {self.path}: ' 187 f'{error}') 188 189 190class TableOrView: 191 name: str 192 type: str 193 desc: str 194 cols: Dict[str, Arg] 195 196 def __init__(self, name, type, desc, cols): 197 self.name = name 198 self.type = type 199 self.desc = desc 200 self.cols = cols 201 202 203class TableViewDocParser(AbstractDocParser): 204 """Parses documentation for CREATE TABLE and CREATE VIEW statements.""" 205 206 def __init__(self, path: str, module: str): 207 super().__init__(path, module) 208 209 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]: 210 assert doc.obj_kind == ObjKind.table_view 211 212 or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match 213 214 if or_replace is not None: 215 self._error( 216 f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 217 f'as standard library modules can only included once. Please just ' 218 f'use CREATE instead.') 219 return 220 221 if _is_internal(self.name): 222 return None 223 224 if not schema and self.name.lower() != "window": 225 self._error( 226 f'{type} "{self.name}": schema is missing for a non-internal stdlib' 227 f' perfetto table or view') 228 return 229 230 if type.lower() == "table" and not perfetto_or_virtual: 231 self._error( 232 f'{type} "{self.name}": Can only expose CREATE PERFETTO tables') 233 return 234 235 is_virtual_table = type.lower() == "table" and perfetto_or_virtual.lower( 236 ) == "virtual" 237 if is_virtual_table and self.name.lower() != "window": 238 self._error(f'{type} "{self.name}": Virtual tables cannot be exposed.') 239 return 240 241 cols = self._parse_columns(schema, ObjKind.table_view) 242 243 244 return TableOrView( 245 name=self._parse_name(), 246 type=type, 247 desc=self._parse_desc_not_empty(doc.description), 248 cols=self._parse_columns(schema, ObjKind.table_view)) 249 250 251class Function: 252 name: str 253 desc: str 254 args: Dict[str, Arg] 255 return_type: str 256 return_desc: str 257 258 def __init__(self, name, desc, args, return_type, return_desc): 259 self.name = name 260 self.desc = desc 261 self.args = args 262 self.return_type = return_type 263 self.return_desc = return_desc 264 265 266class FunctionDocParser(AbstractDocParser): 267 """Parses documentation for CREATE_FUNCTION statements.""" 268 269 def __init__(self, path: str, module: str): 270 super().__init__(path, module) 271 272 def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]: 273 or_replace, self.name, args, ret_comment, ret_type = doc.obj_match 274 275 if or_replace is not None: 276 self._error( 277 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 278 f'as standard library modules can only included once. Please just ' 279 f'use CREATE instead.') 280 281 # Ignore internal functions. 282 if _is_internal(self.name): 283 return None 284 285 name = self._parse_name() 286 287 if not _is_snake_case(name): 288 self._error(f'Function name "{name}" is not snake_case' 289 f' (should be {name.casefold()})') 290 291 ret_desc = None if ret_comment is None else parse_comment(ret_comment) 292 if not ret_desc: 293 self._error(f'Function "{name}": return description is missing') 294 295 return Function( 296 name=name, 297 desc=self._parse_desc_not_empty(doc.description), 298 args=self._parse_args(args, ObjKind.function), 299 return_type=ret_type, 300 return_desc=ret_desc, 301 ) 302 303 304class TableFunction: 305 name: str 306 desc: str 307 cols: Dict[str, Arg] 308 args: Dict[str, Arg] 309 310 def __init__(self, name, desc, cols, args): 311 self.name = name 312 self.desc = desc 313 self.cols = cols 314 self.args = args 315 316 317class TableFunctionDocParser(AbstractDocParser): 318 """Parses documentation for table function statements.""" 319 320 def __init__(self, path: str, module: str): 321 super().__init__(path, module) 322 323 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]: 324 or_replace, self.name, args, ret_comment, columns = doc.obj_match 325 326 if or_replace is not None: 327 self._error( 328 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 329 f'as standard library modules can only included once. Please just ' 330 f'use CREATE instead.') 331 return 332 333 # Ignore internal functions. 334 if _is_internal(self.name): 335 return None 336 337 name = self._parse_name() 338 339 if not _is_snake_case(name): 340 self._error(f'Function name "{name}" is not snake_case' 341 f' (should be "{name.casefold()}")') 342 343 return TableFunction( 344 name=name, 345 desc=self._parse_desc_not_empty(doc.description), 346 cols=self._parse_columns(columns, ObjKind.table_function), 347 args=self._parse_args(args, ObjKind.table_function), 348 ) 349 350 351class Macro: 352 name: str 353 desc: str 354 return_desc: str 355 return_type: str 356 args: Dict[str, Arg] 357 358 def __init__(self, name: str, desc: str, return_desc: str, return_type: str, 359 args: Dict[str, Arg]): 360 self.name = name 361 self.desc = desc 362 self.return_desc = return_desc 363 self.return_type = return_type 364 self.args = args 365 366 367class MacroDocParser(AbstractDocParser): 368 """Parses documentation for macro statements.""" 369 370 def __init__(self, path: str, module: str): 371 super().__init__(path, module) 372 373 def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]: 374 or_replace, self.name, args, return_desc, return_type = doc.obj_match 375 376 if or_replace is not None: 377 self._error( 378 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 379 f'as standard library modules can only included once. Please just ' 380 f'use CREATE instead.') 381 382 # Ignore internal macros. 383 if _is_internal(self.name): 384 return None 385 386 name = self._parse_name() 387 388 if not _is_snake_case(name): 389 self._error(f'Macro name "{name}" is not snake_case' 390 f' (should be "{name.casefold()}")') 391 392 return Macro( 393 name=name, 394 desc=self._parse_desc_not_empty(doc.description), 395 return_desc=parse_comment(return_desc), 396 return_type=return_type, 397 args=self._parse_args(args, ObjKind.macro), 398 ) 399 400 401class Include: 402 package: str 403 module: str 404 module_as_list: List[str] 405 406 def __init__(self, package: str, module: str, module_as_list: List[str]): 407 self.package = package 408 self.module = module 409 self.module_as_list = module_as_list 410 411 412class IncludeParser(AbstractDocParser): 413 """Parses the includes of module.""" 414 415 def __init__(self, path: str, module: str): 416 super().__init__(path, module) 417 418 def parse(self, doc: DocsExtractor.Extract) -> Optional[Include]: 419 self.module = list(doc.obj_match)[0] 420 module_as_list = self.module.split('.') 421 422 return Include( 423 package=module_as_list[0], 424 module=self.module, 425 module_as_list=module_as_list, 426 ) 427 428 429class ParsedModule: 430 """Data class containing all of the documentation of single SQL file""" 431 package_name: str = "" 432 module_as_list: List[str] 433 module: str 434 errors: List[str] = [] 435 table_views: List[TableOrView] = [] 436 functions: List[Function] = [] 437 table_functions: List[TableFunction] = [] 438 macros: List[Macro] = [] 439 includes: List[Include] 440 441 def __init__(self, package_name: str, module_as_list: List[str], 442 errors: List[str], table_views: List[TableOrView], 443 functions: List[Function], table_functions: List[TableFunction], 444 macros: List[Macro], includes: List[Include]): 445 self.package_name = package_name 446 self.module_as_list = module_as_list 447 self.module = ".".join(module_as_list) 448 self.errors = errors 449 self.table_views = table_views 450 self.functions = functions 451 self.table_functions = table_functions 452 self.macros = macros 453 self.includes = includes 454 455 456def parse_file(path: str, sql: str) -> Optional[ParsedModule]: 457 """Reads the provided SQL and, if possible, generates a dictionary with data 458 from documentation together with errors from validation of the schema.""" 459 if sys.platform.startswith('win'): 460 path = path.replace('\\', '/') 461 462 module_as_list: List[str] = path.split('/stdlib/')[-1].split(".sql")[0].split( 463 '/') 464 465 # Get package name 466 package_name = module_as_list[0] 467 468 # Disable support for `deprecated` package 469 if package_name == "deprecated": 470 return 471 472 # Extract all the docs from the SQL. 473 extractor = DocsExtractor(path, package_name, sql) 474 docs = extractor.extract() 475 if extractor.errors: 476 return ParsedModule(package_name, module_as_list, extractor.errors, [], [], 477 [], [], []) 478 479 # Parse the extracted docs. 480 errors: List[str] = [] 481 table_views: List[TableOrView] = [] 482 functions: List[Function] = [] 483 table_functions: List[TableFunction] = [] 484 macros: List[Macro] = [] 485 includes: List[Include] = [] 486 for doc in docs: 487 if doc.obj_kind == ObjKind.table_view: 488 parser = TableViewDocParser(path, package_name) 489 res = parser.parse(doc) 490 if res: 491 table_views.append(res) 492 errors += parser.errors 493 if doc.obj_kind == ObjKind.function: 494 parser = FunctionDocParser(path, package_name) 495 res = parser.parse(doc) 496 if res: 497 functions.append(res) 498 errors += parser.errors 499 if doc.obj_kind == ObjKind.table_function: 500 parser = TableFunctionDocParser(path, package_name) 501 res = parser.parse(doc) 502 if res: 503 table_functions.append(res) 504 errors += parser.errors 505 if doc.obj_kind == ObjKind.macro: 506 parser = MacroDocParser(path, package_name) 507 res = parser.parse(doc) 508 if res: 509 macros.append(res) 510 errors += parser.errors 511 if doc.obj_kind == ObjKind.include: 512 parser = IncludeParser(path, package_name) 513 res = parser.parse(doc) 514 if res: 515 includes.append(res) 516 errors += parser.errors 517 518 return ParsedModule(package_name, module_as_list, errors, table_views, 519 functions, table_functions, macros, includes) 520