• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright (C) 2022 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16from abc import ABC
17from dataclasses import dataclass
18import re
19import sys
20from typing import Dict, List, Optional, Set, NamedTuple
21
22from python.generators.sql_processing.docs_extractor import DocsExtractor
23from python.generators.sql_processing.utils import ObjKind
24
25from python.generators.sql_processing.utils import ALLOWED_PREFIXES
26from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST
27
28from python.generators.sql_processing.utils import COLUMN_ANNOTATION_PATTERN
29from python.generators.sql_processing.utils import ANY_PATTERN
30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN
31from python.generators.sql_processing.utils import ARG_ANNOTATION_PATTERN
32
33
34def _is_internal(name: str) -> bool:
35  return re.match(r'^_.*', name, re.IGNORECASE) is not None
36
37
38def _is_snake_case(s: str) -> bool:
39  return re.fullmatch(r'^[a-z_0-9]*$', s) is not None
40
41
42def parse_comment(comment: str) -> str:
43  """Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar.")."""
44  return ' '.join(line.strip().lstrip('--').lstrip()
45                  for line in comment.strip().split('\n'))
46
47
48def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]:
49  """Returns error message if the name is not correct, None otherwise."""
50  prefix = name.lower().split('_')[0]
51  if module in ["common", "prelude", "deprecated"]:
52    if prefix == module:
53      return (f'Names of tables/views/functions in the "{module}" module '
54              f'should not start with {module}')
55    return None
56  if prefix == module:
57    # Module prefix is always allowed.
58    return None
59  allowed_prefixes = [module]
60  for (path_prefix, allowed_name_prefix) in ALLOWED_PREFIXES.items():
61    if path.startswith(path_prefix):
62      if prefix == allowed_name_prefix:
63        return None
64      allowed_prefixes.append(allowed_name_prefix)
65    if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]:
66      return None
67  return (
68      f'Names of tables/views/functions at path "{path}" should be prefixed '
69      f'with one of following names: {", ".join(allowed_prefixes)}')
70
71
72class Arg(NamedTuple):
73  # TODO(b/307926059): the type is missing on old-style documentation for
74  # tables. Make it "str" after stdlib is migrated.
75  type: Optional[str]
76  description: str
77
78
79class AbstractDocParser(ABC):
80
81  @dataclass
82  class Column:
83    pass
84
85  def __init__(self, path: str, module: str):
86    self.path = path
87    self.module = module
88    self.name = None
89    self.errors = []
90
91  def _parse_name(self, upper: bool = False):
92    assert self.name
93    assert isinstance(self.name, str)
94    module_prefix_error = get_module_prefix_error(self.name, self.path,
95                                                  self.module)
96    if module_prefix_error is not None:
97      self._error(module_prefix_error)
98    return self.name.strip()
99
100  def _parse_desc_not_empty(self, desc: str):
101    if not desc:
102      self._error('Description of the table/view/function/macro is missing')
103    return desc.strip()
104
105  def _validate_only_contains_annotations(self,
106                                          ans: List[DocsExtractor.Annotation],
107                                          ans_types: Set[str]):
108    used_ans_types = set(a.key for a in ans)
109    for type in used_ans_types.difference(ans_types):
110      self._error(f'Unknown documentation annotation {type}')
111
112  def _parse_columns(self, ans: List[DocsExtractor.Annotation],
113                     schema: Optional[str]) -> Dict[str, Arg]:
114    column_annotations = {}
115    for t in ans:
116      if t.key != '@column':
117        continue
118      m = re.match(COLUMN_ANNOTATION_PATTERN, t.value)
119      if not m:
120        self._error(f'@column annotation value {t.value} does not match '
121                    f'pattern {COLUMN_ANNOTATION_PATTERN}')
122        continue
123      column_annotations[m.group(1)] = Arg(None, m.group(2).strip())
124
125    if not schema:
126      # If we don't have schema, we have to accept annotations as the source of
127      # truth.
128      return column_annotations
129
130    columns = self._parse_args_definition(schema)
131
132    for column in columns:
133      inline_comment = columns[column].description
134      if not inline_comment and column not in column_annotations:
135        self._error(f'Column "{column}" is missing a description. Please add a '
136                    'comment in front of the column definition')
137        continue
138
139      if column not in column_annotations:
140        continue
141      annotation = column_annotations[column].description
142      if inline_comment and annotation:
143        self._error(f'Column "{column}" is documented twice. Please remove the '
144                    '@column annotation')
145      if not inline_comment and annotation:
146        # Absorb old-style annotations.
147        columns[column] = Arg(columns[column].type, annotation)
148
149    # Check that the annotations match existing columns.
150    for annotation in column_annotations:
151      if annotation not in columns:
152        self._error(f'Column "{annotation}" is documented but does not exist '
153                    'in table definition')
154    return columns
155
156  def _parse_args(self, ans: List[DocsExtractor.Annotation],
157                  sql_args_str: str) -> Dict[str, Arg]:
158    args = self._parse_args_definition(sql_args_str)
159
160    arg_annotations = {}
161    for an in ans:
162      if an.key != '@arg':
163        continue
164      m = re.match(ARG_ANNOTATION_PATTERN, an.value)
165      if m is None:
166        self._error(f'Expected arg documentation "{an.value}" to match pattern '
167                    f'{ARG_ANNOTATION_PATTERN}')
168        continue
169      arg_annotations[m.group(1)] = Arg(m.group(2), m.group(3).strip())
170
171    for arg in args:
172      if not args[arg].description and arg not in arg_annotations:
173        self._error(f'Arg "{arg}" is missing a description. '
174                    'Please add a comment in front of the arg definition.')
175      if args[arg].description and arg in arg_annotations:
176        self._error(f'Arg "{arg}" is documented twice. '
177                    'Please remove the @arg annotation')
178      if not args[arg].description and arg in arg_annotations:
179        # Absorb old-style annotations.
180        # TODO(b/307926059): Remove it once stdlib is migrated.
181        args[arg] = Arg(args[arg].type, arg_annotations[arg].description)
182
183    for arg in arg_annotations:
184      if arg not in args:
185        self._error(
186            f'Arg "{arg}" is documented but not found in function definition.')
187    return args
188
189  # Parse function argument definition list or a table schema, e.g.
190  # arg1 INT, arg2 STRING, including their comments.
191  def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]:
192    result = {}
193    remaining_args = args_str.strip()
194    while remaining_args:
195      m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args)
196      if not m:
197        self._error(f'Expected "{args_str}" to correspond to '
198                    '"-- Comment\n arg_name TYPE" format '
199                    '({ARG_DEFINITION_PATTERN})')
200        return result
201      groups = m.groups()
202      comment = '' if groups[0] is None else parse_comment(groups[0])
203      name = groups[-3]
204      type = groups[-2]
205      result[name] = Arg(type, comment)
206      # Strip whitespace and comma and parse the next arg.
207      remaining_args = groups[-1].lstrip().lstrip(',').lstrip()
208
209    return result
210
211  def _error(self, error: str):
212    self.errors.append(
213        f'Error while parsing documentation for "{self.name}" in {self.path}: '
214        f'{error}')
215
216
217class TableOrView:
218  name: str
219  type: str
220  desc: str
221  cols: Dict[str, Arg]
222
223  def __init__(self, name, type, desc, cols):
224    self.name = name
225    self.type = type
226    self.desc = desc
227    self.cols = cols
228
229
230class TableViewDocParser(AbstractDocParser):
231  """Parses documentation for CREATE TABLE and CREATE VIEW statements."""
232
233  def __init__(self, path: str, module: str):
234    super().__init__(path, module)
235
236  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]:
237    assert doc.obj_kind == ObjKind.table_view
238
239    or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match
240
241    if or_replace is not None:
242      self._error(
243          f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
244          f'as standard library modules can only included once. Please just '
245          f'use CREATE instead.')
246    if _is_internal(self.name):
247      return None
248
249    is_perfetto_table_or_view = (
250        perfetto_or_virtual and perfetto_or_virtual.lower() == 'perfetto')
251    if not schema and is_perfetto_table_or_view:
252      self._error(
253          f'{type} "{self.name}": schema is missing for a non-internal stdlib'
254          f' perfetto table or view')
255
256    self._validate_only_contains_annotations(doc.annotations, {'@column'})
257    return TableOrView(
258        name=self._parse_name(),
259        type=type,
260        desc=self._parse_desc_not_empty(doc.description),
261        cols=self._parse_columns(doc.annotations, schema),
262    )
263
264
265class Function:
266  name: str
267  desc: str
268  args: Dict[str, Arg]
269  return_type: str
270  return_desc: str
271
272  def __init__(self, name, desc, args, return_type, return_desc):
273    self.name = name
274    self.desc = desc
275    self.args = args
276    self.return_type = return_type
277    self.return_desc = return_desc
278
279
280class FunctionDocParser(AbstractDocParser):
281  """Parses documentation for CREATE_FUNCTION statements."""
282
283  def __init__(self, path: str, module: str):
284    super().__init__(path, module)
285
286  def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]:
287    or_replace, self.name, args, ret_comment, ret_type = doc.obj_match
288
289    if or_replace is not None:
290      self._error(
291          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
292          f'as standard library modules can only included once. Please just '
293          f'use CREATE instead.')
294
295    # Ignore internal functions.
296    if _is_internal(self.name):
297      return None
298
299    name = self._parse_name()
300
301    if not _is_snake_case(name):
302      self._error(f'Function name "{name}" is not snake_case'
303                  f' (should be {name.casefold()})')
304
305    ret_desc = None if ret_comment is None else parse_comment(ret_comment)
306    if not ret_desc:
307      self._error(f'Function "{name}": return description is missing')
308
309    return Function(
310        name=name,
311        desc=self._parse_desc_not_empty(doc.description),
312        args=self._parse_args(doc.annotations, args),
313        return_type=ret_type,
314        return_desc=ret_desc,
315    )
316
317
318class TableFunction:
319  name: str
320  desc: str
321  cols: Dict[str, Arg]
322  args: Dict[str, Arg]
323
324  def __init__(self, name, desc, cols, args):
325    self.name = name
326    self.desc = desc
327    self.cols = cols
328    self.args = args
329
330
331class TableFunctionDocParser(AbstractDocParser):
332  """Parses documentation for table function statements."""
333
334  def __init__(self, path: str, module: str):
335    super().__init__(path, module)
336
337  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]:
338    or_replace, self.name, args, ret_comment, columns = doc.obj_match
339
340    if or_replace is not None:
341      self._error(
342          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
343          f'as standard library modules can only included once. Please just '
344          f'use CREATE instead.')
345
346    # Ignore internal functions.
347    if _is_internal(self.name):
348      return None
349
350    self._validate_only_contains_annotations(doc.annotations,
351                                             {'@arg', '@column'})
352    name = self._parse_name()
353
354    if not _is_snake_case(name):
355      self._error(f'Function name "{name}" is not snake_case'
356                  f' (should be "{name.casefold()}")')
357
358    return TableFunction(
359        name=name,
360        desc=self._parse_desc_not_empty(doc.description),
361        cols=self._parse_columns(doc.annotations, columns),
362        args=self._parse_args(doc.annotations, args),
363    )
364
365
366class Macro:
367  name: str
368  desc: str
369  return_desc: str
370  return_type: str
371  args: Dict[str, Arg]
372
373  def __init__(self, name: str, desc: str, return_desc: str, return_type: str,
374               args: Dict[str, Arg]):
375    self.name = name
376    self.desc = desc
377    self.return_desc = return_desc
378    self.return_type = return_type
379    self.args = args
380
381
382class MacroDocParser(AbstractDocParser):
383  """Parses documentation for macro statements."""
384
385  def __init__(self, path: str, module: str):
386    super().__init__(path, module)
387
388  def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]:
389    or_replace, self.name, args, return_desc, return_type = doc.obj_match
390
391    if or_replace is not None:
392      self._error(
393          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
394          f'as standard library modules can only included once. Please just '
395          f'use CREATE instead.')
396
397    # Ignore internal macros.
398    if _is_internal(self.name):
399      return None
400
401    self._validate_only_contains_annotations(doc.annotations, set())
402    name = self._parse_name()
403
404    if not _is_snake_case(name):
405      self._error(f'Macro name "{name}" is not snake_case'
406                  f' (should be "{name.casefold()}")')
407
408    return Macro(
409        name=name,
410        desc=self._parse_desc_not_empty(doc.description),
411        return_desc=parse_comment(return_desc),
412        return_type=return_type,
413        args=self._parse_args(doc.annotations, args),
414    )
415
416
417class ParsedFile:
418  """Data class containing all of the docmentation of single SQL file"""
419  errors: List[str] = []
420  table_views: List[TableOrView] = []
421  functions: List[Function] = []
422  table_functions: List[TableFunction] = []
423  macros: List[Macro] = []
424
425  def __init__(self, errors: List[str], table_views: List[TableOrView],
426               functions: List[Function], table_functions: List[TableFunction],
427               macros: List[Macro]):
428    self.errors = errors
429    self.table_views = table_views
430    self.functions = functions
431    self.table_functions = table_functions
432    self.macros = macros
433
434
435def parse_file(path: str, sql: str) -> Optional[ParsedFile]:
436  """Reads the provided SQL and, if possible, generates a dictionary with data
437    from documentation together with errors from validation of the schema."""
438  if sys.platform.startswith('win'):
439    path = path.replace('\\', '/')
440
441  # Get module name
442  module_name = path.split('/stdlib/')[-1].split('/')[0]
443
444  # Disable support for `deprecated` module
445  if module_name == "deprecated":
446    return
447
448  # Extract all the docs from the SQL.
449  extractor = DocsExtractor(path, module_name, sql)
450  docs = extractor.extract()
451  if extractor.errors:
452    return ParsedFile(extractor.errors, [], [], [], [])
453
454  # Parse the extracted docs.
455  errors = []
456  table_views = []
457  functions = []
458  table_functions = []
459  macros = []
460  for doc in docs:
461    if doc.obj_kind == ObjKind.table_view:
462      parser = TableViewDocParser(path, module_name)
463      res = parser.parse(doc)
464      if res:
465        table_views.append(res)
466      errors += parser.errors
467    if doc.obj_kind == ObjKind.function:
468      parser = FunctionDocParser(path, module_name)
469      res = parser.parse(doc)
470      if res:
471        functions.append(res)
472      errors += parser.errors
473    if doc.obj_kind == ObjKind.table_function:
474      parser = TableFunctionDocParser(path, module_name)
475      res = parser.parse(doc)
476      if res:
477        table_functions.append(res)
478      errors += parser.errors
479    if doc.obj_kind == ObjKind.macro:
480      parser = MacroDocParser(path, module_name)
481      res = parser.parse(doc)
482      if res:
483        macros.append(res)
484      errors += parser.errors
485
486  return ParsedFile(errors, table_views, functions, table_functions, macros)
487