• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright (C) 2022 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16from abc import ABC
17from dataclasses import dataclass
18import re
19import sys
20from typing import Dict, List, Optional, Set, NamedTuple
21
22from python.generators.sql_processing.docs_extractor import DocsExtractor
23from python.generators.sql_processing.utils import ObjKind
24from python.generators.sql_processing.utils import COLUMN_TYPES, MACRO_ARG_TYPES
25
26from python.generators.sql_processing.utils import ALLOWED_PREFIXES
27from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST
28
29from python.generators.sql_processing.utils import ANY_PATTERN
30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN
31
32
33def _is_internal(name: str) -> bool:
34  return re.match(r'^_.*', name, re.IGNORECASE) is not None
35
36
37def _is_snake_case(s: str) -> bool:
38  return re.fullmatch(r'^[a-z_0-9]*$', s) is not None
39
40
41def parse_comment(comment: str) -> str:
42  """
43  Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar.").
44  """
45  return ' '.join(line.strip().lstrip('--').lstrip()
46                  for line in comment.strip().split('\n'))
47
48
49def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]:
50  """Returns error message if the name is not correct, None otherwise."""
51  if module in ["common", "prelude", "deprecated"]:
52    if name.startswith(module):
53      return (f'Names of tables/views/functions in the "{module}" module '
54              f'should not start with {module}')
55    return None
56  if name.startswith(module):
57    # Module prefix is always allowed.
58    return None
59  allowed_prefixes = [module]
60  for (path_prefix, allowed_name_prefixes) in ALLOWED_PREFIXES.items():
61    if path.startswith(path_prefix):
62      for prefix in allowed_name_prefixes:
63        if name.startswith(prefix):
64          return None
65      allowed_prefixes.extend(allowed_name_prefixes)
66    if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]:
67      return None
68  return (
69      f'Names of tables/views/functions at path "{path}" should be prefixed '
70      f'with one of following names: {", ".join(allowed_prefixes)}')
71
72
73class Arg(NamedTuple):
74  type: str
75  long_type: str
76  description: str
77
78
79class AbstractDocParser(ABC):
80
81  @dataclass
82  class Column:
83    pass
84
85  def __init__(self, path: str, module: str):
86    self.path = path
87    self.module = module
88    self.name = None
89    self.errors = []
90
91  def _parse_name(self, upper: bool = False):
92    assert self.name
93    assert isinstance(self.name, str)
94    module_prefix_error = get_module_prefix_error(self.name, self.path,
95                                                  self.module)
96    if module_prefix_error is not None:
97      self._error(module_prefix_error)
98    return self.name.strip()
99
100  def _parse_desc_not_empty(self, desc: str):
101    if not desc:
102      self._error('Description of the table/view/function/macro is missing')
103    return desc.strip()
104
105  def _parse_columns(self, schema: str, kind: ObjKind) -> Dict[str, Arg]:
106    columns = self._parse_args_definition(schema) if schema else {}
107    for column_name, properties in columns.items():
108      if not properties.description:
109        self._error(f'Column "{column_name}" is missing a description. Please add a '
110                    'comment in front of the column definition')
111        continue
112
113      upper_arg_type = properties.type.upper()
114      if kind is ObjKind.table_function:
115        if upper_arg_type not in COLUMN_TYPES:
116          self._error(
117              f'Table function column "{column_name}" has unsupported type "{properties.type}".')
118      elif kind is ObjKind.table_view:
119        if upper_arg_type not in COLUMN_TYPES:
120          self._error(
121              f'Table/view column "{column_name}" has unsupported type "{properties.type}".')
122      else:
123        self._error(f'This Perfetto SQL object doesnt support columns".')
124
125    return columns
126
127  def _parse_args(self, sql_args_str: str, kind: ObjKind) -> Dict[str, Arg]:
128    args = self._parse_args_definition(sql_args_str)
129
130    for arg in args:
131      if not args[arg].description:
132        self._error(f'Arg "{arg}" is missing a description. '
133                    'Please add a comment in front of the arg definition.')
134
135      upper_arg_type = args[arg].type.upper()
136      if (kind is ObjKind.function or kind is ObjKind.table_function):
137        if upper_arg_type not in COLUMN_TYPES:
138          self._error(
139              f'Function arg "{arg}" has unsupported type "{args[arg].type}".')
140      elif (kind is ObjKind.macro):
141        if upper_arg_type not in MACRO_ARG_TYPES:
142          self._error(
143              f'Macro arg "{arg}" has unsupported type "{args[arg].type}".')
144      else:
145        self._error(f'This Perfetto SQL object doesnt support types".')
146
147    return args
148
149  # Parse function argument definition list or a table schema, e.g.
150  # arg1 INT, arg2 STRING, including their comments.
151  def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]:
152    result = {}
153    remaining_args = args_str.strip()
154    while remaining_args:
155      m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args)
156      if not m:
157        self._error(f'Expected "{args_str}" to correspond to '
158                    '"-- Comment\n arg_name TYPE" format '
159                    '({ARG_DEFINITION_PATTERN})')
160        return result
161      groups = m.groups()
162      comment = '' if groups[0] is None else parse_comment(groups[0])
163      name = groups[-3]
164      type = groups[-2]
165
166      m = re.match(r'JOINID\(([_A-Za-z\.]*)\)', type)
167      if m:
168        result[name] = Arg('JOINID', type, comment)
169        remaining_args = groups[-1].lstrip().lstrip(',').lstrip()
170        continue
171
172      m = re.match(r'ID\(([_A-Za-z\.]*)\)', type)
173      if m:
174        result[name] = Arg('ID', type, comment)
175        remaining_args = groups[-1].lstrip().lstrip(',').lstrip()
176        continue
177
178      result[name] = Arg(type, type, comment)
179      # Strip whitespace and comma and parse the next arg.
180      remaining_args = groups[-1].lstrip().lstrip(',').lstrip()
181
182    return result
183
184  def _error(self, error: str):
185    self.errors.append(
186        f'Error while parsing documentation for "{self.name}" in {self.path}: '
187        f'{error}')
188
189
190class TableOrView:
191  name: str
192  type: str
193  desc: str
194  cols: Dict[str, Arg]
195
196  def __init__(self, name, type, desc, cols):
197    self.name = name
198    self.type = type
199    self.desc = desc
200    self.cols = cols
201
202
203class TableViewDocParser(AbstractDocParser):
204  """Parses documentation for CREATE TABLE and CREATE VIEW statements."""
205
206  def __init__(self, path: str, module: str):
207    super().__init__(path, module)
208
209  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]:
210    assert doc.obj_kind == ObjKind.table_view
211
212    or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match
213
214    if or_replace is not None:
215      self._error(
216          f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
217          f'as standard library modules can only included once. Please just '
218          f'use CREATE instead.')
219      return
220
221    if _is_internal(self.name):
222      return None
223
224    if not schema and self.name.lower() != "window":
225      self._error(
226          f'{type} "{self.name}": schema is missing for a non-internal stdlib'
227          f' perfetto table or view')
228      return
229
230    if type.lower() == "table" and not perfetto_or_virtual:
231      self._error(
232          f'{type} "{self.name}": Can only expose CREATE PERFETTO tables')
233      return
234
235    is_virtual_table = type.lower() == "table" and perfetto_or_virtual.lower(
236    ) == "virtual"
237    if is_virtual_table and self.name.lower() != "window":
238      self._error(f'{type} "{self.name}": Virtual tables cannot be exposed.')
239      return
240
241    cols = self._parse_columns(schema, ObjKind.table_view)
242
243
244    return TableOrView(
245        name=self._parse_name(),
246        type=type,
247        desc=self._parse_desc_not_empty(doc.description),
248        cols=self._parse_columns(schema, ObjKind.table_view))
249
250
251class Function:
252  name: str
253  desc: str
254  args: Dict[str, Arg]
255  return_type: str
256  return_desc: str
257
258  def __init__(self, name, desc, args, return_type, return_desc):
259    self.name = name
260    self.desc = desc
261    self.args = args
262    self.return_type = return_type
263    self.return_desc = return_desc
264
265
266class FunctionDocParser(AbstractDocParser):
267  """Parses documentation for CREATE_FUNCTION statements."""
268
269  def __init__(self, path: str, module: str):
270    super().__init__(path, module)
271
272  def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]:
273    or_replace, self.name, args, ret_comment, ret_type = doc.obj_match
274
275    if or_replace is not None:
276      self._error(
277          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
278          f'as standard library modules can only included once. Please just '
279          f'use CREATE instead.')
280
281    # Ignore internal functions.
282    if _is_internal(self.name):
283      return None
284
285    name = self._parse_name()
286
287    if not _is_snake_case(name):
288      self._error(f'Function name "{name}" is not snake_case'
289                  f' (should be {name.casefold()})')
290
291    ret_desc = None if ret_comment is None else parse_comment(ret_comment)
292    if not ret_desc:
293      self._error(f'Function "{name}": return description is missing')
294
295    return Function(
296        name=name,
297        desc=self._parse_desc_not_empty(doc.description),
298        args=self._parse_args(args, ObjKind.function),
299        return_type=ret_type,
300        return_desc=ret_desc,
301    )
302
303
304class TableFunction:
305  name: str
306  desc: str
307  cols: Dict[str, Arg]
308  args: Dict[str, Arg]
309
310  def __init__(self, name, desc, cols, args):
311    self.name = name
312    self.desc = desc
313    self.cols = cols
314    self.args = args
315
316
317class TableFunctionDocParser(AbstractDocParser):
318  """Parses documentation for table function statements."""
319
320  def __init__(self, path: str, module: str):
321    super().__init__(path, module)
322
323  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]:
324    or_replace, self.name, args, ret_comment, columns = doc.obj_match
325
326    if or_replace is not None:
327      self._error(
328          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
329          f'as standard library modules can only included once. Please just '
330          f'use CREATE instead.')
331      return
332
333    # Ignore internal functions.
334    if _is_internal(self.name):
335      return None
336
337    name = self._parse_name()
338
339    if not _is_snake_case(name):
340      self._error(f'Function name "{name}" is not snake_case'
341                  f' (should be "{name.casefold()}")')
342
343    return TableFunction(
344        name=name,
345        desc=self._parse_desc_not_empty(doc.description),
346        cols=self._parse_columns(columns, ObjKind.table_function),
347        args=self._parse_args(args, ObjKind.table_function),
348    )
349
350
351class Macro:
352  name: str
353  desc: str
354  return_desc: str
355  return_type: str
356  args: Dict[str, Arg]
357
358  def __init__(self, name: str, desc: str, return_desc: str, return_type: str,
359               args: Dict[str, Arg]):
360    self.name = name
361    self.desc = desc
362    self.return_desc = return_desc
363    self.return_type = return_type
364    self.args = args
365
366
367class MacroDocParser(AbstractDocParser):
368  """Parses documentation for macro statements."""
369
370  def __init__(self, path: str, module: str):
371    super().__init__(path, module)
372
373  def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]:
374    or_replace, self.name, args, return_desc, return_type = doc.obj_match
375
376    if or_replace is not None:
377      self._error(
378          f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib '
379          f'as standard library modules can only included once. Please just '
380          f'use CREATE instead.')
381
382    # Ignore internal macros.
383    if _is_internal(self.name):
384      return None
385
386    name = self._parse_name()
387
388    if not _is_snake_case(name):
389      self._error(f'Macro name "{name}" is not snake_case'
390                  f' (should be "{name.casefold()}")')
391
392    return Macro(
393        name=name,
394        desc=self._parse_desc_not_empty(doc.description),
395        return_desc=parse_comment(return_desc),
396        return_type=return_type,
397        args=self._parse_args(args, ObjKind.macro),
398    )
399
400
401class Include:
402  package: str
403  module: str
404  module_as_list: List[str]
405
406  def __init__(self, package: str, module: str, module_as_list: List[str]):
407    self.package = package
408    self.module = module
409    self.module_as_list = module_as_list
410
411
412class IncludeParser(AbstractDocParser):
413  """Parses the includes of module."""
414
415  def __init__(self, path: str, module: str):
416    super().__init__(path, module)
417
418  def parse(self, doc: DocsExtractor.Extract) -> Optional[Include]:
419    self.module = list(doc.obj_match)[0]
420    module_as_list = self.module.split('.')
421
422    return Include(
423        package=module_as_list[0],
424        module=self.module,
425        module_as_list=module_as_list,
426    )
427
428
429class ParsedModule:
430  """Data class containing all of the documentation of single SQL file"""
431  package_name: str = ""
432  module_as_list: List[str]
433  module: str
434  errors: List[str] = []
435  table_views: List[TableOrView] = []
436  functions: List[Function] = []
437  table_functions: List[TableFunction] = []
438  macros: List[Macro] = []
439  includes: List[Include]
440
441  def __init__(self, package_name: str, module_as_list: List[str],
442               errors: List[str], table_views: List[TableOrView],
443               functions: List[Function], table_functions: List[TableFunction],
444               macros: List[Macro], includes: List[Include]):
445    self.package_name = package_name
446    self.module_as_list = module_as_list
447    self.module = ".".join(module_as_list)
448    self.errors = errors
449    self.table_views = table_views
450    self.functions = functions
451    self.table_functions = table_functions
452    self.macros = macros
453    self.includes = includes
454
455
456def parse_file(path: str, sql: str) -> Optional[ParsedModule]:
457  """Reads the provided SQL and, if possible, generates a dictionary with data
458    from documentation together with errors from validation of the schema."""
459  if sys.platform.startswith('win'):
460    path = path.replace('\\', '/')
461
462  module_as_list: List[str] = path.split('/stdlib/')[-1].split(".sql")[0].split(
463      '/')
464
465  # Get package name
466  package_name = module_as_list[0]
467
468  # Disable support for `deprecated` package
469  if package_name == "deprecated":
470    return
471
472  # Extract all the docs from the SQL.
473  extractor = DocsExtractor(path, package_name, sql)
474  docs = extractor.extract()
475  if extractor.errors:
476    return ParsedModule(package_name, module_as_list, extractor.errors, [], [],
477                        [], [], [])
478
479  # Parse the extracted docs.
480  errors: List[str] = []
481  table_views: List[TableOrView] = []
482  functions: List[Function] = []
483  table_functions: List[TableFunction] = []
484  macros: List[Macro] = []
485  includes: List[Include] = []
486  for doc in docs:
487    if doc.obj_kind == ObjKind.table_view:
488      parser = TableViewDocParser(path, package_name)
489      res = parser.parse(doc)
490      if res:
491        table_views.append(res)
492      errors += parser.errors
493    if doc.obj_kind == ObjKind.function:
494      parser = FunctionDocParser(path, package_name)
495      res = parser.parse(doc)
496      if res:
497        functions.append(res)
498      errors += parser.errors
499    if doc.obj_kind == ObjKind.table_function:
500      parser = TableFunctionDocParser(path, package_name)
501      res = parser.parse(doc)
502      if res:
503        table_functions.append(res)
504      errors += parser.errors
505    if doc.obj_kind == ObjKind.macro:
506      parser = MacroDocParser(path, package_name)
507      res = parser.parse(doc)
508      if res:
509        macros.append(res)
510      errors += parser.errors
511    if doc.obj_kind == ObjKind.include:
512      parser = IncludeParser(path, package_name)
513      res = parser.parse(doc)
514      if res:
515        includes.append(res)
516      errors += parser.errors
517
518  return ParsedModule(package_name, module_as_list, errors, table_views,
519                      functions, table_functions, macros, includes)
520