1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# Copyright (c) 2024 Huawei Device Co., Ltd. 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import os 17import json 18import argparse 19import logging 20from typing import List, Dict, Optional 21 22REQUIRED_FIELDS = [ 23 "Name", 24 "License", 25 "License File", 26 "Version Number", 27 "Owner", 28 "Upstream URL", 29 "Description" 30] 31 32 33class OpenSourceValidator: 34 def __init__( 35 self, 36 project_root: str, 37 log_file: Optional[str] = None, 38 reference_data: Optional[List[Dict[str, str]]] = None 39 ): 40 self.project_root = project_root 41 self.reference_data = reference_data or [] 42 self.log_file = log_file 43 44 # 设置日志配置 45 if self.log_file: 46 logging.basicConfig( 47 filename=self.log_file, 48 level=logging.INFO, 49 format="%(asctime)s - %(levelname)s - %(message)s", 50 ) 51 else: 52 logging.basicConfig( 53 level=logging.INFO, 54 format="%(asctime)s - %(levelname)s - %(message)s", 55 ) 56 57 def find_all_readmes(self) -> List[str]: 58 """递归查找所有 README.OpenSource 文件""" 59 readme_paths = [] 60 for dirpath, _, filenames in os.walk(self.project_root): 61 if "README.OpenSource" in filenames: 62 readme_paths.append(os.path.join(dirpath, "README.OpenSource")) 63 return readme_paths 64 65 def validate_format(self, readme_path: str) -> bool: 66 """验证 README.OpenSource 文件的格式和必需字段""" 67 errors = [] 68 try: 69 with open(readme_path, 'r', encoding='utf-8') as f: 70 data = json.load(f) 71 if not isinstance(data, list): 72 errors.append("The file does not contain a JSON array.") 73 return False 74 for idx, component in enumerate(data): 75 for field in REQUIRED_FIELDS: 76 if field not in component: 77 errors.append(f"Component {idx + 1} is missing required field: {field}") 78 79 # 校验 Dependencies 字段是否存在并且是一个数组 80 if "Dependencies" in component: 81 if not isinstance(component["Dependencies"], list): 82 errors.append(f"Component {idx + 1} 'Dependencies' field must be an array.") 83 else: 84 for dep in component["Dependencies"]: 85 if not isinstance(dep, str): 86 errors.append(f"Component {idx + 1} 'Dependencies' contains a non-string value: {dep}") 87 88 except json.JSONDecodeError as e: 89 errors.append(f"JSON decode error: {e}") 90 return False 91 except Exception as e: 92 errors.append(f"Unexpected error: {e}") 93 return False 94 95 if errors: 96 for error in errors: 97 logging.error(f"{readme_path}: {error}") 98 return False 99 else: 100 logging.info(f"{readme_path} format is valid.") 101 return True 102 103 def load_reference_data(self, reference_data_path: str): 104 """从 JSON 配置文件中加载参考数据""" 105 try: 106 with open(reference_data_path, "r", encoding='utf-8') as f: 107 self.reference_data = json.load(f) 108 except Exception as e: 109 raise ValueError( 110 f"Failed to load reference data from '{reference_data_path}': {e}" 111 ) 112 113 def find_reference_data(self, name: str) -> Optional[Dict[str, str]]: 114 """在参考数据中根据名称查找对应的开源软件信息""" 115 for reference in self.reference_data: 116 if reference.get("Name") == name: 117 return reference 118 return None 119 120 def validate_content(self, readme_path: str) -> bool: 121 """校验 README.OpenSource 文件的内容,并与参考数据进行比对""" 122 # Step 1: 读取 JSON 文件 123 try: 124 with open(readme_path, "r", encoding='utf-8') as f: 125 readme_data = json.load(f) 126 if not isinstance(readme_data, list): 127 logging.error(f"{readme_path}: JSON data is not an array.") 128 return False 129 except json.JSONDecodeError as e: 130 logging.error(f"{readme_path}: JSON decode error: {e}") 131 return False 132 133 # Step 2: 校验 JSON 数组中的每个开源软件元数据 134 all_valid = True 135 for software_data in readme_data: 136 name = software_data.get("Name") 137 if not name: 138 logging.error(f"{readme_path}: Missing 'Name' field in software data.") 139 all_valid = False 140 continue 141 142 reference_data = self.find_reference_data(name) 143 144 if reference_data is None: 145 logging.error( 146 f"{readme_path}: Software '{name}' not found in reference data." 147 ) 148 all_valid = False 149 continue 150 151 # 比对 "Name", "License", "Version Number", "Upstream URL" 152 for field in ["Name", "License", "Version Number", "Upstream URL"]: 153 expected_value = reference_data.get(field) 154 actual_value = software_data.get(field) 155 if actual_value != expected_value: 156 logging.error( 157 f"{readme_path}: Field '{field}' mismatch for '{name}'. Expected: '{expected_value}', Found: '{actual_value}'" 158 ) 159 all_valid = False 160 161 # 校验 "License File" 路径是否存在 162 if not self.validate_license_file(readme_path, software_data.get("License File")): 163 all_valid = False 164 165 # 校验依赖项(Dependencies)是否正确 166 if not self.validate_dependencies(software_data.get("Dependencies"), readme_path): 167 all_valid = False 168 169 if all_valid: 170 logging.info(f"{readme_path}: Content validation passed.") 171 else: 172 logging.error(f"{readme_path}: Content validation failed.") 173 return all_valid 174 175 def validate_license_file(self, readme_path: str, license_file: str) -> bool: 176 """校验 LICENSE 文件是否存在,路径相对于 README.OpenSource 文件所在目录""" 177 if not license_file: 178 logging.error(f"{readme_path}: 'License File' field is missing.") 179 return False 180 181 # 支持多个许可证文件路径,以分号分隔 182 license_files = license_file.split(';') 183 readme_dir = os.path.dirname(readme_path) 184 all_valid = True 185 186 for file in license_files: 187 license_file_path = os.path.join(readme_dir, file.strip()) 188 if not os.path.exists(license_file_path): 189 logging.error( 190 f"{readme_path}: License file '{file.strip()}' not found at: {license_file_path}" 191 ) 192 all_valid = False 193 else: 194 logging.info(f"{readme_path}: License file '{file.strip()}' exists.") 195 196 return all_valid 197 198 def validate_dependencies(self, dependencies: Optional[List[str]], readme_path: str) -> bool: 199 """校验 Dependencies 字段是否符合预期""" 200 if dependencies is None: 201 return True # 没有依赖项是合法的 202 203 if not isinstance(dependencies, list): 204 logging.error(f"{readme_path}: 'Dependencies' should be an array.") 205 return False 206 207 for dep in dependencies: 208 if not isinstance(dep, str): 209 logging.error(f"{readme_path}: 'Dependencies' contains non-string value: {dep}") 210 return False 211 212 logging.info(f"{readme_path}: 'Dependencies' field is valid.") 213 return True 214 215 def run_validation(self, validate_format: bool = True, validate_content: bool = False): 216 """运行完整的校验流程,递归处理所有 README.OpenSource 文件""" 217 try: 218 readme_paths = self.find_all_readmes() 219 if not readme_paths: 220 logging.error("No README.OpenSource files found in the project directory.") 221 return 222 223 for readme_path in readme_paths: 224 logging.info(f"Validating: {readme_path}") 225 if validate_format: 226 if not self.validate_format(readme_path): 227 logging.error(f"{readme_path}: Format validation failed.") 228 continue # 如果格式验证失败,跳过内容验证 229 if validate_content: 230 if not self.validate_content(readme_path): 231 logging.error(f"{readme_path}: Content validation failed.") 232 233 logging.info("Validation process completed.") 234 235 except Exception as e: 236 logging.error(f"Validation failed: {e}") 237 238 239def main(): 240 parser = argparse.ArgumentParser( 241 description="Validate README.OpenSource files in a project." 242 ) 243 parser.add_argument("project_root", help="The root directory of the project.") 244 parser.add_argument( 245 "--validate-format", action='store_true', help="Validate the format of README.OpenSource files." 246 ) 247 parser.add_argument( 248 "--validate-content", action='store_true', help="Validate the content of README.OpenSource files against reference data." 249 ) 250 parser.add_argument( 251 "--reference-data", help="Path to the reference data JSON file (required for content validation)." 252 ) 253 parser.add_argument("--log-file", help="Path to the log file for validation results.") 254 255 args = parser.parse_args() 256 257 if args.validate_content and not args.reference_data: 258 parser.error("--reference-data is required for content validation.") 259 260 # 初始化验证器对象 261 validator = OpenSourceValidator( 262 project_root=args.project_root, 263 log_file=args.log_file 264 ) 265 266 if args.validate_content: 267 # 从配置文件中加载参考数据 268 validator.load_reference_data(args.reference_data) 269 270 # 执行校验流程 271 validator.run_validation( 272 validate_format=args.validate_format, 273 validate_content=args.validate_content 274 ) 275 276 277if __name__ == "__main__": 278 main() 279