1#!/usr/bin/env python3 2# Copyright (C) 2022 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16from abc import ABC 17from dataclasses import dataclass 18import re 19import sys 20from typing import Dict, List, Optional, Set, NamedTuple 21 22from python.generators.sql_processing.docs_extractor import DocsExtractor 23from python.generators.sql_processing.utils import ObjKind 24 25from python.generators.sql_processing.utils import ALLOWED_PREFIXES 26from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST 27 28from python.generators.sql_processing.utils import COLUMN_ANNOTATION_PATTERN 29from python.generators.sql_processing.utils import ANY_PATTERN 30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN 31from python.generators.sql_processing.utils import ARG_ANNOTATION_PATTERN 32 33 34def _is_internal(name: str) -> bool: 35 return re.match(r'^_.*', name, re.IGNORECASE) is not None 36 37 38def _is_snake_case(s: str) -> bool: 39 return re.fullmatch(r'^[a-z_0-9]*$', s) is not None 40 41 42def parse_comment(comment: str) -> str: 43 """Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar.").""" 44 return ' '.join(line.strip().lstrip('--').lstrip() 45 for line in comment.strip().split('\n')) 46 47 48def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]: 49 """Returns error message if the name is not correct, None otherwise.""" 50 prefix = name.lower().split('_')[0] 51 if module in ["common", "prelude", "deprecated"]: 52 if prefix == module: 53 return (f'Names of tables/views/functions in the "{module}" module ' 54 f'should not start with {module}') 55 return None 56 if prefix == module: 57 # Module prefix is always allowed. 58 return None 59 allowed_prefixes = [module] 60 for (path_prefix, allowed_name_prefix) in ALLOWED_PREFIXES.items(): 61 if path.startswith(path_prefix): 62 if prefix == allowed_name_prefix: 63 return None 64 allowed_prefixes.append(allowed_name_prefix) 65 if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]: 66 return None 67 return ( 68 f'Names of tables/views/functions at path "{path}" should be prefixed ' 69 f'with one of following names: {", ".join(allowed_prefixes)}') 70 71 72class Arg(NamedTuple): 73 # TODO(b/307926059): the type is missing on old-style documentation for 74 # tables. Make it "str" after stdlib is migrated. 75 type: Optional[str] 76 description: str 77 78 79class AbstractDocParser(ABC): 80 81 @dataclass 82 class Column: 83 pass 84 85 def __init__(self, path: str, module: str): 86 self.path = path 87 self.module = module 88 self.name = None 89 self.errors = [] 90 91 def _parse_name(self, upper: bool = False): 92 assert self.name 93 assert isinstance(self.name, str) 94 module_prefix_error = get_module_prefix_error(self.name, self.path, 95 self.module) 96 if module_prefix_error is not None: 97 self._error(module_prefix_error) 98 return self.name.strip() 99 100 def _parse_desc_not_empty(self, desc: str): 101 if not desc: 102 self._error('Description of the table/view/function/macro is missing') 103 return desc.strip() 104 105 def _validate_only_contains_annotations(self, 106 ans: List[DocsExtractor.Annotation], 107 ans_types: Set[str]): 108 used_ans_types = set(a.key for a in ans) 109 for type in used_ans_types.difference(ans_types): 110 self._error(f'Unknown documentation annotation {type}') 111 112 def _parse_columns(self, ans: List[DocsExtractor.Annotation], 113 schema: Optional[str]) -> Dict[str, Arg]: 114 column_annotations = {} 115 for t in ans: 116 if t.key != '@column': 117 continue 118 m = re.match(COLUMN_ANNOTATION_PATTERN, t.value) 119 if not m: 120 self._error(f'@column annotation value {t.value} does not match ' 121 f'pattern {COLUMN_ANNOTATION_PATTERN}') 122 continue 123 column_annotations[m.group(1)] = Arg(None, m.group(2).strip()) 124 125 if not schema: 126 # If we don't have schema, we have to accept annotations as the source of 127 # truth. 128 return column_annotations 129 130 columns = self._parse_args_definition(schema) 131 132 for column in columns: 133 inline_comment = columns[column].description 134 if not inline_comment and column not in column_annotations: 135 self._error(f'Column "{column}" is missing a description. Please add a ' 136 'comment in front of the column definition') 137 continue 138 139 if column not in column_annotations: 140 continue 141 annotation = column_annotations[column].description 142 if inline_comment and annotation: 143 self._error(f'Column "{column}" is documented twice. Please remove the ' 144 '@column annotation') 145 if not inline_comment and annotation: 146 # Absorb old-style annotations. 147 columns[column] = Arg(columns[column].type, annotation) 148 149 # Check that the annotations match existing columns. 150 for annotation in column_annotations: 151 if annotation not in columns: 152 self._error(f'Column "{annotation}" is documented but does not exist ' 153 'in table definition') 154 return columns 155 156 def _parse_args(self, ans: List[DocsExtractor.Annotation], 157 sql_args_str: str) -> Dict[str, Arg]: 158 args = self._parse_args_definition(sql_args_str) 159 160 arg_annotations = {} 161 for an in ans: 162 if an.key != '@arg': 163 continue 164 m = re.match(ARG_ANNOTATION_PATTERN, an.value) 165 if m is None: 166 self._error(f'Expected arg documentation "{an.value}" to match pattern ' 167 f'{ARG_ANNOTATION_PATTERN}') 168 continue 169 arg_annotations[m.group(1)] = Arg(m.group(2), m.group(3).strip()) 170 171 for arg in args: 172 if not args[arg].description and arg not in arg_annotations: 173 self._error(f'Arg "{arg}" is missing a description. ' 174 'Please add a comment in front of the arg definition.') 175 if args[arg].description and arg in arg_annotations: 176 self._error(f'Arg "{arg}" is documented twice. ' 177 'Please remove the @arg annotation') 178 if not args[arg].description and arg in arg_annotations: 179 # Absorb old-style annotations. 180 # TODO(b/307926059): Remove it once stdlib is migrated. 181 args[arg] = Arg(args[arg].type, arg_annotations[arg].description) 182 183 for arg in arg_annotations: 184 if arg not in args: 185 self._error( 186 f'Arg "{arg}" is documented but not found in function definition.') 187 return args 188 189 # Parse function argument definition list or a table schema, e.g. 190 # arg1 INT, arg2 STRING, including their comments. 191 def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]: 192 result = {} 193 remaining_args = args_str.strip() 194 while remaining_args: 195 m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args) 196 if not m: 197 self._error(f'Expected "{args_str}" to correspond to ' 198 '"-- Comment\n arg_name TYPE" format ' 199 '({ARG_DEFINITION_PATTERN})') 200 return result 201 groups = m.groups() 202 comment = '' if groups[0] is None else parse_comment(groups[0]) 203 name = groups[-3] 204 type = groups[-2] 205 result[name] = Arg(type, comment) 206 # Strip whitespace and comma and parse the next arg. 207 remaining_args = groups[-1].lstrip().lstrip(',').lstrip() 208 209 return result 210 211 def _error(self, error: str): 212 self.errors.append( 213 f'Error while parsing documentation for "{self.name}" in {self.path}: ' 214 f'{error}') 215 216 217class TableOrView: 218 name: str 219 type: str 220 desc: str 221 cols: Dict[str, Arg] 222 223 def __init__(self, name, type, desc, cols): 224 self.name = name 225 self.type = type 226 self.desc = desc 227 self.cols = cols 228 229 230class TableViewDocParser(AbstractDocParser): 231 """Parses documentation for CREATE TABLE and CREATE VIEW statements.""" 232 233 def __init__(self, path: str, module: str): 234 super().__init__(path, module) 235 236 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]: 237 assert doc.obj_kind == ObjKind.table_view 238 239 or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match 240 241 if or_replace is not None: 242 self._error( 243 f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 244 f'as standard library modules can only included once. Please just ' 245 f'use CREATE instead.') 246 if _is_internal(self.name): 247 return None 248 249 is_perfetto_table_or_view = ( 250 perfetto_or_virtual and perfetto_or_virtual.lower() == 'perfetto') 251 if not schema and is_perfetto_table_or_view: 252 self._error( 253 f'{type} "{self.name}": schema is missing for a non-internal stdlib' 254 f' perfetto table or view') 255 256 self._validate_only_contains_annotations(doc.annotations, {'@column'}) 257 return TableOrView( 258 name=self._parse_name(), 259 type=type, 260 desc=self._parse_desc_not_empty(doc.description), 261 cols=self._parse_columns(doc.annotations, schema), 262 ) 263 264 265class Function: 266 name: str 267 desc: str 268 args: Dict[str, Arg] 269 return_type: str 270 return_desc: str 271 272 def __init__(self, name, desc, args, return_type, return_desc): 273 self.name = name 274 self.desc = desc 275 self.args = args 276 self.return_type = return_type 277 self.return_desc = return_desc 278 279 280class FunctionDocParser(AbstractDocParser): 281 """Parses documentation for CREATE_FUNCTION statements.""" 282 283 def __init__(self, path: str, module: str): 284 super().__init__(path, module) 285 286 def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]: 287 or_replace, self.name, args, ret_comment, ret_type = doc.obj_match 288 289 if or_replace is not None: 290 self._error( 291 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 292 f'as standard library modules can only included once. Please just ' 293 f'use CREATE instead.') 294 295 # Ignore internal functions. 296 if _is_internal(self.name): 297 return None 298 299 name = self._parse_name() 300 301 if not _is_snake_case(name): 302 self._error(f'Function name "{name}" is not snake_case' 303 f' (should be {name.casefold()})') 304 305 ret_desc = None if ret_comment is None else parse_comment(ret_comment) 306 if not ret_desc: 307 self._error(f'Function "{name}": return description is missing') 308 309 return Function( 310 name=name, 311 desc=self._parse_desc_not_empty(doc.description), 312 args=self._parse_args(doc.annotations, args), 313 return_type=ret_type, 314 return_desc=ret_desc, 315 ) 316 317 318class TableFunction: 319 name: str 320 desc: str 321 cols: Dict[str, Arg] 322 args: Dict[str, Arg] 323 324 def __init__(self, name, desc, cols, args): 325 self.name = name 326 self.desc = desc 327 self.cols = cols 328 self.args = args 329 330 331class TableFunctionDocParser(AbstractDocParser): 332 """Parses documentation for table function statements.""" 333 334 def __init__(self, path: str, module: str): 335 super().__init__(path, module) 336 337 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]: 338 or_replace, self.name, args, ret_comment, columns = doc.obj_match 339 340 if or_replace is not None: 341 self._error( 342 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 343 f'as standard library modules can only included once. Please just ' 344 f'use CREATE instead.') 345 346 # Ignore internal functions. 347 if _is_internal(self.name): 348 return None 349 350 self._validate_only_contains_annotations(doc.annotations, 351 {'@arg', '@column'}) 352 name = self._parse_name() 353 354 if not _is_snake_case(name): 355 self._error(f'Function name "{name}" is not snake_case' 356 f' (should be "{name.casefold()}")') 357 358 return TableFunction( 359 name=name, 360 desc=self._parse_desc_not_empty(doc.description), 361 cols=self._parse_columns(doc.annotations, columns), 362 args=self._parse_args(doc.annotations, args), 363 ) 364 365 366class Macro: 367 name: str 368 desc: str 369 return_desc: str 370 return_type: str 371 args: Dict[str, Arg] 372 373 def __init__(self, name: str, desc: str, return_desc: str, return_type: str, 374 args: Dict[str, Arg]): 375 self.name = name 376 self.desc = desc 377 self.return_desc = return_desc 378 self.return_type = return_type 379 self.args = args 380 381 382class MacroDocParser(AbstractDocParser): 383 """Parses documentation for macro statements.""" 384 385 def __init__(self, path: str, module: str): 386 super().__init__(path, module) 387 388 def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]: 389 or_replace, self.name, args, return_desc, return_type = doc.obj_match 390 391 if or_replace is not None: 392 self._error( 393 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 394 f'as standard library modules can only included once. Please just ' 395 f'use CREATE instead.') 396 397 # Ignore internal macros. 398 if _is_internal(self.name): 399 return None 400 401 self._validate_only_contains_annotations(doc.annotations, set()) 402 name = self._parse_name() 403 404 if not _is_snake_case(name): 405 self._error(f'Macro name "{name}" is not snake_case' 406 f' (should be "{name.casefold()}")') 407 408 return Macro( 409 name=name, 410 desc=self._parse_desc_not_empty(doc.description), 411 return_desc=parse_comment(return_desc), 412 return_type=return_type, 413 args=self._parse_args(doc.annotations, args), 414 ) 415 416 417class ParsedFile: 418 """Data class containing all of the docmentation of single SQL file""" 419 errors: List[str] = [] 420 table_views: List[TableOrView] = [] 421 functions: List[Function] = [] 422 table_functions: List[TableFunction] = [] 423 macros: List[Macro] = [] 424 425 def __init__(self, errors: List[str], table_views: List[TableOrView], 426 functions: List[Function], table_functions: List[TableFunction], 427 macros: List[Macro]): 428 self.errors = errors 429 self.table_views = table_views 430 self.functions = functions 431 self.table_functions = table_functions 432 self.macros = macros 433 434 435def parse_file(path: str, sql: str) -> Optional[ParsedFile]: 436 """Reads the provided SQL and, if possible, generates a dictionary with data 437 from documentation together with errors from validation of the schema.""" 438 if sys.platform.startswith('win'): 439 path = path.replace('\\', '/') 440 441 # Get module name 442 module_name = path.split('/stdlib/')[-1].split('/')[0] 443 444 # Disable support for `deprecated` module 445 if module_name == "deprecated": 446 return 447 448 # Extract all the docs from the SQL. 449 extractor = DocsExtractor(path, module_name, sql) 450 docs = extractor.extract() 451 if extractor.errors: 452 return ParsedFile(extractor.errors, [], [], [], []) 453 454 # Parse the extracted docs. 455 errors = [] 456 table_views = [] 457 functions = [] 458 table_functions = [] 459 macros = [] 460 for doc in docs: 461 if doc.obj_kind == ObjKind.table_view: 462 parser = TableViewDocParser(path, module_name) 463 res = parser.parse(doc) 464 if res: 465 table_views.append(res) 466 errors += parser.errors 467 if doc.obj_kind == ObjKind.function: 468 parser = FunctionDocParser(path, module_name) 469 res = parser.parse(doc) 470 if res: 471 functions.append(res) 472 errors += parser.errors 473 if doc.obj_kind == ObjKind.table_function: 474 parser = TableFunctionDocParser(path, module_name) 475 res = parser.parse(doc) 476 if res: 477 table_functions.append(res) 478 errors += parser.errors 479 if doc.obj_kind == ObjKind.macro: 480 parser = MacroDocParser(path, module_name) 481 res = parser.parse(doc) 482 if res: 483 macros.append(res) 484 errors += parser.errors 485 486 return ParsedFile(errors, table_views, functions, table_functions, macros) 487