• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# coding=utf-8
3#
4# Copyright (c) 2024-2025 Huawei Device Co., Ltd.
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18"""Module provides custom text tools for parsing."""
19
20from typing import Tuple, Dict
21from log_tools import debug_log
22
23
24MAX_LEN = 10000000
25
26
27def find_first_not_restricted_character(restricted: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
28    for i in range(pos, min(len(data), pos_end)):
29        if data[i] not in restricted:
30            return i
31    return len(data)
32
33
34def rfind_first_not_restricted_character(restricted: str, data: str, pos: int, pos_end: int = 0) -> int:
35    """pos_end includes in searching"""
36    if pos > len(data):
37        pos = len(data) - 1
38    while pos >= max(0, pos_end):
39        if data[pos] not in restricted:
40            return pos
41        pos -= 1
42    return len(data)
43
44
45def skip_rstring(data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
46    """Returns next position after R-string, start pos if no R-string found"""
47    if data[pos] != '"' or pos < 1:
48        return pos
49
50    if data[pos - 1 : pos + 1] == 'R"':
51        start_of_string_data = data.find("(", pos)
52        if start_of_string_data == -1:
53            raise RuntimeError("Error while finding start of R-string.")
54
55        delimeter = f"{data[pos + 1 : start_of_string_data]}"
56        end_of_string_data = data.find(f'){delimeter}"', start_of_string_data)
57
58        if end_of_string_data == -1 or end_of_string_data >= min(len(data), pos_end):
59            raise RuntimeError("Error while finding end of R-string.")
60
61        return end_of_string_data + len(f'){delimeter}"')
62
63    return pos
64
65
66def skip_string(data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
67    """Returns next position after string, raise Runtime error if no string found"""
68    if data[pos] not in "'\"":
69        return pos
70
71    current_quote = data[pos]
72    pos = data.find(current_quote, pos + 1)
73
74    # Skip escaped quotes
75    while pos > 0 and pos < min(len(data), pos_end) and data[pos - 1] == "\\" and (pos == 1 or data[pos - 2] != "\\"):
76        pos = data.find(current_quote, pos + 1)
77
78    if pos == -1 or pos >= min(len(data), pos_end):
79        raise RuntimeError("Error while finding end of string.")
80    return pos + 1
81
82
83def find_first_of_characters(characters: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
84    while pos < min(len(data), pos_end) and pos != -1:
85        # Skip strings (if we are not looking for quotes)
86        if "'" not in characters and '"' not in characters and data[pos] in "'\"":
87            pos = skip_rstring(data, pos)
88            pos = skip_string(data, pos)
89            continue
90        if data[pos] in characters:
91            return pos
92        pos += 1
93    return len(data)
94
95
96def rfind_first_of_characters(characters: str, data: str, pos: int, pos_end: int = 0) -> int:
97    """pos_end includes in searching"""
98    if pos > len(data):
99        pos = len(data) - 1
100    while pos >= max(0, pos_end):
101        if data[pos] in characters:
102            return pos
103        pos -= 1
104    return len(data)
105
106
107def is_operator(data: str, current_pos: int) -> bool:
108    if current_pos < len("operator") + 1 or data[current_pos - len("operator") - 1].isalpha():
109        return False
110    return data[current_pos - len("operator") : current_pos] == "operator"
111
112
113def find_scope_borders(data: str, start: int = 0, opening_bracket: str = "{") -> Tuple[int, int]:
114    """
115    Returns tuple of positions of opening and closing brackets in 'data'.
116    Raises RuntimeError if can't find scope borders.
117    """
118    brackets_match: Dict[str, str] = {"{": "}", "(": ")", "<": ">", "[": "]"}
119    opening = opening_bracket
120    start_of_scope = start
121
122    while not opening:
123        start_of_scope = find_first_of_characters("({<[", data, start_of_scope)
124        if start_of_scope == len(data):
125            raise RuntimeError("No opening bracket found in ANY mode")
126        if is_operator(data, start_of_scope):
127            start_of_scope = find_first_not_restricted_character(data[start_of_scope], data, start_of_scope + 1)
128        else:
129            opening = data[start_of_scope]
130
131    start_of_scope = data.find(opening, start)
132
133    while is_operator(data, start_of_scope):
134        start_of_scope = find_first_not_restricted_character(opening, data, start_of_scope + 1)
135        start_of_scope = find_first_of_characters(opening, data, start_of_scope)
136
137    if start_of_scope == -1:
138        raise RuntimeError("No opening bracket found")
139
140    current_pos = start_of_scope
141    closing = brackets_match[opening]
142    bracket_sequence_sum = 1
143
144    while bracket_sequence_sum != 0:
145        current_pos = find_first_of_characters(f"{opening}{closing}", data, current_pos + 1)
146        if current_pos == len(data):
147            raise RuntimeError("Error while finding end of scope.")
148        if data[current_pos] == opening:
149            bracket_sequence_sum += 1
150        elif data[current_pos] == closing:
151            bracket_sequence_sum -= 1
152
153    return start_of_scope, current_pos
154
155
156def smart_split_by(data: str, delim: str = ",") -> list:
157    data = data.strip(" \n")
158
159    res = []
160    segment_start = 0
161
162    while segment_start < len(data):
163
164        next_delim = smart_find_first_of_characters(delim, data, segment_start)
165
166        segment = data[segment_start:next_delim].strip(" \n")
167        if segment != "":
168            res.append(segment)
169        else:
170            debug_log("Empty segment in smart_split_by.")
171
172        segment_start = find_first_not_restricted_character(f"{delim} \n", data, next_delim)
173
174    return res
175
176
177def smart_find_first_of_characters(characters: str, data: str, pos: int) -> int:
178    i = pos
179    while i < len(data):
180        if data[i] in characters:
181            return i
182
183        if data[i] in "<({[":
184            _, close_bracket = find_scope_borders(data, i, "")
185            i = close_bracket
186
187        elif data[i] == '"':
188            i = data.find('"', i + 1)
189            while i != -1 and data[i] == '"' and i != 0 and data[i - 1] == "\\":
190                i = data.find('"', i + 1)
191
192        elif data[i] == "'":
193            i = data.find("'", i + 1)
194
195        i += 1
196
197    return len(data)
198
199
200def check_cpp_name(data: str) -> bool:
201    data = data.lower()
202    forbidden_chars = " ~!@#$%^&*()-+=[]\\{}|;:'\",./<>?"
203    return find_first_of_characters(forbidden_chars, data) == len(data)
204