• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2024 Huawei Device Co., Ltd.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import os
17import json
18import argparse
19import logging
20from typing import List, Dict, Optional
21
22REQUIRED_FIELDS = [
23    "Name",
24    "License",
25    "License File",
26    "Version Number",
27    "Owner",
28    "Upstream URL",
29    "Description"
30]
31
32
33class OpenSourceValidator:
34    def __init__(
35            self,
36            project_root: str,
37            log_file: Optional[str] = None,
38            reference_data: Optional[List[Dict[str, str]]] = None
39    ):
40        self.project_root = project_root
41        self.reference_data = reference_data or []
42        self.log_file = log_file
43
44        # 设置日志配置
45        if self.log_file:
46            logging.basicConfig(
47                filename=self.log_file,
48                level=logging.INFO,
49                format="%(asctime)s - %(levelname)s - %(message)s",
50            )
51        else:
52            logging.basicConfig(
53                level=logging.INFO,
54                format="%(asctime)s - %(levelname)s - %(message)s",
55            )
56
57    def find_all_readmes(self) -> List[str]:
58        """递归查找所有 README.OpenSource 文件"""
59        readme_paths = []
60        for dirpath, _, filenames in os.walk(self.project_root):
61            if "README.OpenSource" in filenames:
62                readme_paths.append(os.path.join(dirpath, "README.OpenSource"))
63        return readme_paths
64
65    def validate_format(self, readme_path: str) -> bool:
66        """验证 README.OpenSource 文件的格式和必需字段"""
67        errors = []
68        try:
69            with open(readme_path, 'r', encoding='utf-8') as f:
70                data = json.load(f)
71            if not isinstance(data, list):
72                errors.append("The file does not contain a JSON array.")
73                return False
74            for idx, component in enumerate(data):
75                for field in REQUIRED_FIELDS:
76                    if field not in component:
77                        errors.append(f"Component {idx + 1} is missing required field: {field}")
78
79                # 校验 Dependencies 字段是否存在并且是一个数组
80                if "Dependencies" in component:
81                    if not isinstance(component["Dependencies"], list):
82                        errors.append(f"Component {idx + 1} 'Dependencies' field must be an array.")
83                    else:
84                        for dep in component["Dependencies"]:
85                            if not isinstance(dep, str):
86                                errors.append(f"Component {idx + 1} 'Dependencies' contains a non-string value: {dep}")
87
88        except json.JSONDecodeError as e:
89            errors.append(f"JSON decode error: {e}")
90            return False
91        except Exception as e:
92            errors.append(f"Unexpected error: {e}")
93            return False
94
95        if errors:
96            for error in errors:
97                logging.error(f"{readme_path}: {error}")
98            return False
99        else:
100            logging.info(f"{readme_path} format is valid.")
101            return True
102
103    def load_reference_data(self, reference_data_path: str):
104        """从 JSON 配置文件中加载参考数据"""
105        try:
106            with open(reference_data_path, "r", encoding='utf-8') as f:
107                self.reference_data = json.load(f)
108        except Exception as e:
109            raise ValueError(
110                f"Failed to load reference data from '{reference_data_path}': {e}"
111            )
112
113    def find_reference_data(self, name: str) -> Optional[Dict[str, str]]:
114        """在参考数据中根据名称查找对应的开源软件信息"""
115        for reference in self.reference_data:
116            if reference.get("Name") == name:
117                return reference
118        return None
119
120    def validate_content(self, readme_path: str) -> bool:
121        """校验 README.OpenSource 文件的内容,并与参考数据进行比对"""
122        # Step 1: 读取 JSON 文件
123        try:
124            with open(readme_path, "r", encoding='utf-8') as f:
125                readme_data = json.load(f)
126            if not isinstance(readme_data, list):
127                logging.error(f"{readme_path}: JSON data is not an array.")
128                return False
129        except json.JSONDecodeError as e:
130            logging.error(f"{readme_path}: JSON decode error: {e}")
131            return False
132
133        # Step 2: 校验 JSON 数组中的每个开源软件元数据
134        all_valid = True
135        for software_data in readme_data:
136            name = software_data.get("Name")
137            if not name:
138                logging.error(f"{readme_path}: Missing 'Name' field in software data.")
139                all_valid = False
140                continue
141
142            reference_data = self.find_reference_data(name)
143
144            if reference_data is None:
145                logging.error(
146                    f"{readme_path}: Software '{name}' not found in reference data."
147                )
148                all_valid = False
149                continue
150
151            # 比对 "Name", "License", "Version Number", "Upstream URL"
152            for field in ["Name", "License", "Version Number", "Upstream URL"]:
153                expected_value = reference_data.get(field)
154                actual_value = software_data.get(field)
155                if actual_value != expected_value:
156                    logging.error(
157                        f"{readme_path}: Field '{field}' mismatch for '{name}'. Expected: '{expected_value}', Found: '{actual_value}'"
158                    )
159                    all_valid = False
160
161            # 校验 "License File" 路径是否存在
162            if not self.validate_license_file(readme_path, software_data.get("License File")):
163                all_valid = False
164
165            # 校验依赖项(Dependencies)是否正确
166            if not self.validate_dependencies(software_data.get("Dependencies"), readme_path):
167                all_valid = False
168
169        if all_valid:
170            logging.info(f"{readme_path}: Content validation passed.")
171        else:
172            logging.error(f"{readme_path}: Content validation failed.")
173        return all_valid
174
175    def validate_license_file(self, readme_path: str, license_file: str) -> bool:
176        """校验 LICENSE 文件是否存在,路径相对于 README.OpenSource 文件所在目录"""
177        if not license_file:
178            logging.error(f"{readme_path}: 'License File' field is missing.")
179            return False
180
181        # 支持多个许可证文件路径,以分号分隔
182        license_files = license_file.split(';')
183        readme_dir = os.path.dirname(readme_path)
184        all_valid = True
185
186        for file in license_files:
187            license_file_path = os.path.join(readme_dir, file.strip())
188            if not os.path.exists(license_file_path):
189                logging.error(
190                    f"{readme_path}: License file '{file.strip()}' not found at: {license_file_path}"
191                )
192                all_valid = False
193            else:
194                logging.info(f"{readme_path}: License file '{file.strip()}' exists.")
195
196        return all_valid
197
198    def validate_dependencies(self, dependencies: Optional[List[str]], readme_path: str) -> bool:
199        """校验 Dependencies 字段是否符合预期"""
200        if dependencies is None:
201            return True  # 没有依赖项是合法的
202
203        if not isinstance(dependencies, list):
204            logging.error(f"{readme_path}: 'Dependencies' should be an array.")
205            return False
206
207        for dep in dependencies:
208            if not isinstance(dep, str):
209                logging.error(f"{readme_path}: 'Dependencies' contains non-string value: {dep}")
210                return False
211
212        logging.info(f"{readme_path}: 'Dependencies' field is valid.")
213        return True
214
215    def run_validation(self, validate_format: bool = True, validate_content: bool = False):
216        """运行完整的校验流程,递归处理所有 README.OpenSource 文件"""
217        try:
218            readme_paths = self.find_all_readmes()
219            if not readme_paths:
220                logging.error("No README.OpenSource files found in the project directory.")
221                return
222
223            for readme_path in readme_paths:
224                logging.info(f"Validating: {readme_path}")
225                if validate_format:
226                    if not self.validate_format(readme_path):
227                        logging.error(f"{readme_path}: Format validation failed.")
228                        continue  # 如果格式验证失败,跳过内容验证
229                if validate_content:
230                    if not self.validate_content(readme_path):
231                        logging.error(f"{readme_path}: Content validation failed.")
232
233            logging.info("Validation process completed.")
234
235        except Exception as e:
236            logging.error(f"Validation failed: {e}")
237
238
239def main():
240    parser = argparse.ArgumentParser(
241        description="Validate README.OpenSource files in a project."
242    )
243    parser.add_argument("project_root", help="The root directory of the project.")
244    parser.add_argument(
245        "--validate-format", action='store_true', help="Validate the format of README.OpenSource files."
246    )
247    parser.add_argument(
248        "--validate-content", action='store_true', help="Validate the content of README.OpenSource files against reference data."
249    )
250    parser.add_argument(
251        "--reference-data", help="Path to the reference data JSON file (required for content validation)."
252    )
253    parser.add_argument("--log-file", help="Path to the log file for validation results.")
254
255    args = parser.parse_args()
256
257    if args.validate_content and not args.reference_data:
258        parser.error("--reference-data is required for content validation.")
259
260    # 初始化验证器对象
261    validator = OpenSourceValidator(
262        project_root=args.project_root,
263        log_file=args.log_file
264    )
265
266    if args.validate_content:
267        # 从配置文件中加载参考数据
268        validator.load_reference_data(args.reference_data)
269
270    # 执行校验流程
271    validator.run_validation(
272        validate_format=args.validate_format,
273        validate_content=args.validate_content
274    )
275
276
277if __name__ == "__main__":
278    main()
279