1#!/usr/bin/env python3 2# coding=utf-8 3# 4# Copyright (c) 2024-2025 Huawei Device Co., Ltd. 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17 18"""Module provides custom text tools for parsing.""" 19 20from typing import Tuple, Dict 21from log_tools import debug_log 22 23 24MAX_LEN = 10000000 25 26 27def find_first_not_restricted_character(restricted: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 28 for i in range(pos, min(len(data), pos_end)): 29 if data[i] not in restricted: 30 return i 31 return len(data) 32 33 34def rfind_first_not_restricted_character(restricted: str, data: str, pos: int, pos_end: int = 0) -> int: 35 """pos_end includes in searching""" 36 if pos > len(data): 37 pos = len(data) - 1 38 while pos >= max(0, pos_end): 39 if data[pos] not in restricted: 40 return pos 41 pos -= 1 42 return len(data) 43 44 45def skip_rstring(data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 46 """Returns next position after R-string, start pos if no R-string found""" 47 if data[pos] != '"' or pos < 1: 48 return pos 49 50 if data[pos - 1 : pos + 1] == 'R"': 51 start_of_string_data = data.find("(", pos) 52 if start_of_string_data == -1: 53 raise RuntimeError("Error while finding start of R-string.") 54 55 delimeter = f"{data[pos + 1 : start_of_string_data]}" 56 end_of_string_data = data.find(f'){delimeter}"', start_of_string_data) 57 58 if end_of_string_data == -1 or end_of_string_data >= min(len(data), pos_end): 59 raise RuntimeError("Error while finding end of R-string.") 60 61 return end_of_string_data + len(f'){delimeter}"') 62 63 return pos 64 65 66def skip_string(data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 67 """Returns next position after string, raise Runtime error if no string found""" 68 if data[pos] not in "'\"": 69 return pos 70 71 current_quote = data[pos] 72 pos = data.find(current_quote, pos + 1) 73 74 # Skip escaped quotes 75 while pos > 0 and pos < min(len(data), pos_end) and data[pos - 1] == "\\" and (pos == 1 or data[pos - 2] != "\\"): 76 pos = data.find(current_quote, pos + 1) 77 78 if pos == -1 or pos >= min(len(data), pos_end): 79 raise RuntimeError("Error while finding end of string.") 80 return pos + 1 81 82 83def find_first_of_characters(characters: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 84 while pos < min(len(data), pos_end) and pos != -1: 85 # Skip strings (if we are not looking for quotes) 86 if "'" not in characters and '"' not in characters and data[pos] in "'\"": 87 pos = skip_rstring(data, pos) 88 pos = skip_string(data, pos) 89 continue 90 if data[pos] in characters: 91 return pos 92 pos += 1 93 return len(data) 94 95 96def rfind_first_of_characters(characters: str, data: str, pos: int, pos_end: int = 0) -> int: 97 """pos_end includes in searching""" 98 if pos > len(data): 99 pos = len(data) - 1 100 while pos >= max(0, pos_end): 101 if data[pos] in characters: 102 return pos 103 pos -= 1 104 return len(data) 105 106 107def is_operator(data: str, current_pos: int) -> bool: 108 if current_pos < len("operator") + 1 or data[current_pos - len("operator") - 1].isalpha(): 109 return False 110 return data[current_pos - len("operator") : current_pos] == "operator" 111 112 113def find_scope_borders(data: str, start: int = 0, opening_bracket: str = "{") -> Tuple[int, int]: 114 """ 115 Returns tuple of positions of opening and closing brackets in 'data'. 116 Raises RuntimeError if can't find scope borders. 117 """ 118 brackets_match: Dict[str, str] = {"{": "}", "(": ")", "<": ">", "[": "]"} 119 opening = opening_bracket 120 start_of_scope = start 121 122 while not opening: 123 start_of_scope = find_first_of_characters("({<[", data, start_of_scope) 124 if start_of_scope == len(data): 125 raise RuntimeError("No opening bracket found in ANY mode") 126 if is_operator(data, start_of_scope): 127 start_of_scope = find_first_not_restricted_character(data[start_of_scope], data, start_of_scope + 1) 128 else: 129 opening = data[start_of_scope] 130 131 start_of_scope = data.find(opening, start) 132 133 while is_operator(data, start_of_scope): 134 start_of_scope = find_first_not_restricted_character(opening, data, start_of_scope + 1) 135 start_of_scope = find_first_of_characters(opening, data, start_of_scope) 136 137 if start_of_scope == -1: 138 raise RuntimeError("No opening bracket found") 139 140 current_pos = start_of_scope 141 closing = brackets_match[opening] 142 bracket_sequence_sum = 1 143 144 while bracket_sequence_sum != 0: 145 current_pos = find_first_of_characters(f"{opening}{closing}", data, current_pos + 1) 146 if current_pos == len(data): 147 raise RuntimeError("Error while finding end of scope.") 148 if data[current_pos] == opening: 149 bracket_sequence_sum += 1 150 elif data[current_pos] == closing: 151 bracket_sequence_sum -= 1 152 153 return start_of_scope, current_pos 154 155 156def smart_split_by(data: str, delim: str = ",") -> list: 157 data = data.strip(" \n") 158 159 res = [] 160 segment_start = 0 161 162 while segment_start < len(data): 163 164 next_delim = smart_find_first_of_characters(delim, data, segment_start) 165 166 segment = data[segment_start:next_delim].strip(" \n") 167 if segment != "": 168 res.append(segment) 169 else: 170 debug_log("Empty segment in smart_split_by.") 171 172 segment_start = find_first_not_restricted_character(f"{delim} \n", data, next_delim) 173 174 return res 175 176 177def smart_find_first_of_characters(characters: str, data: str, pos: int) -> int: 178 i = pos 179 while i < len(data): 180 if data[i] in characters: 181 return i 182 183 if data[i] in "<({[": 184 _, close_bracket = find_scope_borders(data, i, "") 185 i = close_bracket 186 187 elif data[i] == '"': 188 i = data.find('"', i + 1) 189 while i != -1 and data[i] == '"' and i != 0 and data[i - 1] == "\\": 190 i = data.find('"', i + 1) 191 192 elif data[i] == "'": 193 i = data.find("'", i + 1) 194 195 i += 1 196 197 return len(data) 198 199 200def check_cpp_name(data: str) -> bool: 201 data = data.lower() 202 forbidden_chars = " ~!@#$%^&*()-+=[]\\{}|;:'\",./<>?" 203 return find_first_of_characters(forbidden_chars, data) == len(data) 204