1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4# 5# Copyright (c) 2025 Northeastern University 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17# 18 19from enum import Enum 20from pathlib import Path 21from typing import Optional, List, Dict, Any 22from typing import Set 23 24from ohos.sbom.data.target import Target 25from ohos.sbom.sbom.metadata.sbom_meta_data import RelationshipType 26 27 28class FileType(Enum): 29 C_HEADER = 'h' 30 C_SOURCE = 'c' 31 CPP_SOURCE = 'cpp' 32 CXX_SOURCE = 'cc' 33 RUST_SOURCE = 'rs' 34 35 SHARED_LIBRARY = 'so' 36 STATIC_LIBRARY = 'a' 37 WINDOWS_DLL = 'dll' 38 WINDOWS_LIB = 'lib' 39 RUST_STATIC_LIBRARY = 'rlib' 40 41 TEXT = 'txt' 42 JSON = 'json' 43 44 HAP = 'hap' 45 IPA = 'ipa' 46 ZIP = 'zip' 47 TAR = 'tar' 48 GZ = 'gz' 49 BZ2 = 'bz2' 50 51 OBJECT_FILE = 'o' 52 DEPENDENCY_FILE = 'd' 53 54 ABC = 'abc' 55 56 UNKNOWN = 'unknown' 57 58 @property 59 def is_shared_library(self) -> bool: 60 return self in {FileType.SHARED_LIBRARY, FileType.WINDOWS_DLL} 61 62 @property 63 def is_static_library(self) -> bool: 64 return self in {FileType.STATIC_LIBRARY, FileType.WINDOWS_LIB, FileType.RUST_STATIC_LIBRARY} 65 66 @property 67 def is_source_code(self) -> bool: 68 return self in { 69 FileType.C_SOURCE, 70 FileType.CPP_SOURCE, 71 FileType.CXX_SOURCE, 72 FileType.RUST_SOURCE, 73 FileType.C_HEADER 74 } 75 76 @property 77 def is_intermediate(self) -> bool: 78 return self in {FileType.OBJECT_FILE, FileType.DEPENDENCY_FILE} 79 80 @property 81 def is_data_file(self) -> bool: 82 return self in {FileType.TEXT, FileType.JSON} 83 84 @property 85 def is_package(self) -> bool: 86 return self in { 87 FileType.HAP, FileType.IPA, FileType.ZIP, 88 FileType.TAR, FileType.GZ, FileType.BZ2 89 } 90 91 @property 92 def is_bytecode(self) -> bool: 93 return self == FileType.ABC 94 95 @property 96 def is_library(self) -> bool: 97 return self.is_static_library or self.is_shared_library 98 99 @property 100 def is_object_file(self) -> bool: 101 return self == FileType.OBJECT_FILE 102 103 104class File: 105 """Represents a file with dependencies and build relationships""" 106 107 __slots__ = ('_relative_path', '_source_target', '_dependencies', '_file_type') 108 109 def __init__( 110 self, 111 relative_path: str, 112 source_target: Optional['Target'], 113 file_type: Optional['FileType'] = None 114 ): 115 self._relative_path = relative_path 116 self._source_target = source_target 117 118 self._file_type = file_type if file_type is not None else self._determine_file_type() 119 120 self._dependencies = { 121 type: set() 122 for type in RelationshipType 123 } 124 125 @property 126 def relative_path(self) -> str: 127 return self._relative_path 128 129 @property 130 def source_target(self) -> 'Target': 131 return self._source_target 132 133 @property 134 def is_final_artifact(self) -> bool: 135 return (self.is_shared_library or 136 self.is_bytecode or 137 self.is_package) 138 139 @property 140 def is_stripped(self): 141 return "unstripped/" not in self.relative_path 142 143 @property 144 def is_source_code(self) -> bool: 145 return self._file_type.is_source_code if self._file_type else False 146 147 @property 148 def is_object_file(self) -> bool: 149 return self._file_type == FileType.OBJECT_FILE if self._file_type else False 150 151 @property 152 def is_intermediate(self) -> bool: 153 return self._file_type.is_intermediate if self._file_type else False 154 155 @property 156 def is_bytecode(self) -> bool: 157 return self._file_type == FileType.ABC if self._file_type else False 158 159 @property 160 def is_library(self) -> bool: 161 return self._file_type.is_library if self._file_type else False 162 163 @property 164 def is_static_library(self) -> bool: 165 return self._file_type.is_static_library if self._file_type else False 166 167 @property 168 def is_shared_library(self) -> bool: 169 return self._file_type.is_shared_library if self._file_type else False 170 171 @property 172 def is_data_file(self) -> bool: 173 return self._file_type.is_data_file if self._file_type else False 174 175 @property 176 def is_package(self) -> bool: 177 return self._file_type.is_package if self._file_type else False 178 179 def add_dependency(self, dep_type: RelationshipType, target: 'File') -> None: 180 if dep_type not in self._dependencies: 181 raise ValueError(f"Invalid dependency type: {dep_type}") 182 183 self._dependencies[dep_type].add(target) 184 185 def add_dependency_list(self, dep_type: RelationshipType, file_list: List['File']) -> None: 186 for file in file_list: 187 self.add_dependency(dep_type, file) 188 189 def get_dependencies(self, dep_type: Optional[RelationshipType] = None): 190 if dep_type: 191 return self._dependencies.get(dep_type, set()) 192 return self._dependencies 193 194 def set_dependencies(self, dep_type: RelationshipType, file_list: Set['File']): 195 self._dependencies[dep_type] = set(file_list) 196 197 def add_dependency_by_file_type(self, file: 'File') -> None: 198 dependency_type = RelationshipType.DEPENDS_ON 199 try: 200 dependency_type = self._determine_dependency_type(file) 201 except Exception as e: 202 print(f"Error occurred when adding dependency: {e}") 203 finally: 204 self.add_dependency(dependency_type, file) 205 206 def add_dependency_list_by_file_type(self, file_list: List['File']) -> None: 207 for file in file_list: 208 self.add_dependency_by_file_type(file) 209 210 def get_file_type_name(self) -> Optional[str]: 211 if self._file_type: 212 name = self._file_type.name.lower() 213 return name 214 return None 215 216 def to_dict(self) -> Dict[str, Any]: 217 source_target_name = None 218 source_target_type = None 219 if self._source_target is not None: 220 source_target_name = getattr(self._source_target, 'target_name', None) 221 source_target_type = getattr(self._source_target, 'type', None) 222 223 dependencies = {} 224 for dep_type, file_set in self._dependencies.items(): 225 path_list = [f.relative_path for f in file_set if hasattr(f, 'relative_path')] 226 dependencies[dep_type.value] = path_list 227 228 file_type_name = self.get_file_type_name() 229 230 return { 231 "source_target_type": source_target_type, 232 "file_type": file_type_name, 233 "relative_path": self.relative_path, 234 "source_target": source_target_name, 235 "dependencies": dependencies 236 } 237 238 def _determine_file_type(self) -> Optional[FileType]: 239 """ 240 Determines the file type based on the file extension and special naming patterns. 241 242 Returns: 243 The corresponding FileType enum value, or None if path is invalid. 244 """ 245 if not self._relative_path: 246 return None 247 248 path = Path(self._relative_path) 249 ext = path.suffix.lower().lstrip('.') 250 251 # Special case: handle compressed archives like .tar.gz, .tar.bz2, .zip.gz 252 if ext in ['gz', 'bz2']: 253 stem_ext = path.stem.split('.')[-1].lower() 254 if stem_ext == 'tar': 255 return FileType.TAR 256 if stem_ext == 'zip': 257 return FileType.ZIP 258 # Fall through to let the general logic handle plain .gz or .bz2 files 259 260 # Handle object files: any file with extension '.o' is considered an object file 261 if ext == 'o': 262 return FileType.OBJECT_FILE 263 264 # General matching: check if the extension matches any known FileType value 265 for item in FileType: 266 if item.value == ext: 267 return item 268 269 return FileType.UNKNOWN 270 271 def _determine_dependency_type(self, file: 'File') -> RelationshipType: 272 """Determine the appropriate relationship type based on self and file types.""" 273 if self.is_shared_library or (hasattr(self, 'source_target') and self.source_target.type == 'executable'): 274 return self._handle_shared_library_or_executable_dep(file) 275 if self.is_static_library: 276 return self._handle_static_library_dep(file) 277 if self.is_data_file: 278 return self._handle_data_file_dep(file) 279 if self.is_package: 280 return self._handle_package_dep(file) 281 if self.is_intermediate: 282 return self._handle_intermediate_file_dep(file) 283 if self.is_bytecode: 284 return self._handle_bytecode_file_dep(file) 285 return RelationshipType.DEPENDS_ON 286 287 def _handle_shared_library_or_executable_dep(self, file: 'File') -> RelationshipType: 288 """Handle deps for shared libraries or files from executables.""" 289 if file.is_static_library: 290 return RelationshipType.STATIC_LINK 291 if file.is_shared_library: 292 return RelationshipType.DYNAMIC_LINK 293 if file.is_source_code or file.is_intermediate: 294 return RelationshipType.GENERATED_FROM 295 return RelationshipType.DEPENDS_ON 296 297 def _handle_static_library_dep(self, file: 'File') -> RelationshipType: 298 """Handle deps for static libraries.""" 299 if file.is_shared_library or file.is_static_library: 300 return RelationshipType.DEPENDS_ON 301 if file.is_source_code or file.is_intermediate: 302 return RelationshipType.GENERATED_FROM 303 return RelationshipType.DEPENDS_ON 304 305 def _handle_data_file_dep(self, file: 'File') -> RelationshipType: 306 """Handle deps for data files.""" 307 if file.is_source_code or file.is_intermediate: 308 return RelationshipType.GENERATED_FROM 309 return RelationshipType.DEPENDS_ON 310 311 def _handle_package_dep(self, file: 'File') -> RelationshipType: 312 """Handle deps for package files.""" 313 return RelationshipType.GENERATED_FROM 314 315 def _handle_intermediate_file_dep(self, file: 'File') -> RelationshipType: 316 """Handle deps for intermediate files (e.g., .o).""" 317 if file.is_source_code: 318 return RelationshipType.GENERATED_FROM 319 if file.is_intermediate and file.relative_path.endswith('.o'): 320 return RelationshipType.OTHER 321 return RelationshipType.DEPENDS_ON 322 323 def _handle_bytecode_file_dep(self, file: 'File') -> RelationshipType: 324 """Handle deps for bytecode files.""" 325 if file.is_source_code or file.is_intermediate: 326 return RelationshipType.GENERATED_FROM 327 return RelationshipType.DEPENDS_ON 328