1#!/usr/bin/env python3 2# SPDX-License-Identifier: MIT 3 4# Provide a markdown-formatted message summarizing the reasons why a pipeline failed. 5# Marge bot can use this script to provide more helpful comments when CI fails. 6# Example for running locally: 7# ./bin/ci/pipeline_message.sh --project-id 176 --pipeline-id 1310098 8 9 10import argparse 11import asyncio 12import logging 13from typing import Any 14 15import aiohttp 16 17PER_PAGE: int = 6000 18 19 20async def get_pipeline_status( 21 session: aiohttp.ClientSession, project_id: str, pipeline_id: str 22): 23 url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/pipelines/{pipeline_id}" 24 logging.info(f"Fetching pipeline status from {url}") 25 async with session.get(url) as response: 26 response.raise_for_status() 27 pipeline_details = await response.json() 28 return pipeline_details.get("status") 29 30 31async def get_jobs_for_pipeline( 32 session: aiohttp.ClientSession, project_id: str, pipeline_id: str 33): 34 url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/pipelines/{pipeline_id}/jobs" 35 logging.info(url) 36 jobs = [] 37 params = {"per_page": PER_PAGE} 38 async with session.get(url, params=params) as response: 39 response.raise_for_status() 40 jobs = await response.json() 41 return jobs 42 43 44def get_problem_jobs(jobs: list[dict[str, Any]]): 45 ignore_stage_list = [ 46 "postmerge", 47 "performance", 48 ] 49 problem_jobs = [] 50 for job in jobs: 51 if any(ignore.lower() in job["stage"] for ignore in ignore_stage_list): 52 continue 53 if job["status"] in {"failed", "canceled"}: 54 problem_jobs.append(job) 55 return problem_jobs 56 57 58def unexpected_improvements(failed_test_array): 59 if failed_test_array["unexpected_improvements"]: 60 unexpected_improvements_count = len( 61 failed_test_array["unexpected_improvements"] 62 ) 63 return f" {unexpected_improvements_count} improved test{'s' if unexpected_improvements_count != 1 else ''}" 64 return "" 65 66 67def fails(failed_test_array): 68 if failed_test_array["fails"]: 69 fails_count = len(failed_test_array["fails"]) 70 return f" {fails_count} failed test{'s' if fails_count != 1 else ''}" 71 return "" 72 73 74def crashes(failed_test_array): 75 if failed_test_array["crashes"]: 76 crash_count = len(failed_test_array["crashes"]) 77 return f" {crash_count} crashed test{'s' if crash_count != 1 else ''}" 78 return "" 79 80 81def get_failed_test_details(failed_test_array): 82 message = "" 83 max_tests_to_display = 5 84 85 if failed_test_array["unexpected_improvements"]: 86 for i, test in enumerate(failed_test_array["unexpected_improvements"]): 87 if i > max_tests_to_display: 88 message += " \nand more...<br>" 89 break 90 message += f"{test}<br>" 91 92 if failed_test_array["fails"]: 93 for i, test in enumerate(failed_test_array["fails"]): 94 if i > max_tests_to_display: 95 message += " \nand more...<br>" 96 break 97 message += f"{test}<br>" 98 99 if failed_test_array["crashes"]: 100 for i, test in enumerate(failed_test_array["crashes"]): 101 if i > max_tests_to_display: 102 message += " \nand more...<br>" 103 break 104 message += f"{test}<br>" 105 106 return message 107 108 109def get_failed_test_summary_message(failed_test_array): 110 summary_msg = "<summary>" 111 summary_msg += unexpected_improvements(failed_test_array) 112 summary_msg += fails(failed_test_array) 113 summary_msg += crashes(failed_test_array) 114 summary_msg += "</summary>" 115 return summary_msg 116 117 118def sort_failed_tests_by_status(failures_csv): 119 failed_test_array = { 120 "unexpected_improvements": [], 121 "fails": [], 122 "crashes": [], 123 "timeouts": [], 124 } 125 126 for test in failures_csv.splitlines(): 127 if "UnexpectedImprovement" in test: 128 failed_test_array["unexpected_improvements"].append(test) 129 elif "Fail" in test: 130 failed_test_array["fails"].append(test) 131 elif "Crash" in test: 132 failed_test_array["crashes"].append(test) 133 elif "Timeout" in test: 134 failed_test_array["timeouts"].append(test) 135 136 return failed_test_array 137 138 139async def get_failures_csv(session, project_id, job): 140 job_id = job["id"] 141 url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/jobs/{job_id}/artifacts/results/failures.csv" 142 async with session.get(url) as response: 143 if response.status == 200: 144 text = await response.text() 145 return text 146 else: 147 logging.debug(f"No response from: {url}") 148 return "" 149 150 151async def get_test_failures(session, project_id, job): 152 failures_csv = await get_failures_csv(session, project_id, job) 153 if not failures_csv: 154 return "" 155 156 # If just one test failed, don't bother with more complicated sorting 157 lines = failures_csv.splitlines() 158 if len(lines) == 1: 159 return ": " + lines[0] + "<br>" 160 161 failed_test_array = sort_failed_tests_by_status(failures_csv) 162 failures_msg = "<details>" 163 failures_msg += get_failed_test_summary_message(failed_test_array) 164 failures_msg += get_failed_test_details(failed_test_array) 165 failures_msg += "</details>" 166 167 return failures_msg 168 169 170async def get_trace_failures(session, project_id, job): 171 project_json = await get_project_json(session, project_id) 172 path = project_json.get("path", "") 173 if not path: 174 return "" 175 176 job_id = job["id"] 177 url = f"https://mesa.pages.freedesktop.org/-/{path}/-/jobs/{job_id}/artifacts/results/summary/problems.html" 178 async with session.get(url) as response: 179 if response.status == 200: 180 return url 181 else: 182 logging.debug(f"No response from: {url}") 183 return "" 184 185 186async def get_project_json(session, project_id): 187 url_project_id = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}" 188 async with session.get(url_project_id) as response: 189 if response.status == 200: 190 return await response.json() 191 else: 192 logging.debug(f"No response from: {url_project_id}") 193 return "" 194 195 196async def get_job_log(session: aiohttp.ClientSession, project_id: str, job_id: int): 197 project_json = await get_project_json(session, project_id) 198 path_with_namespace = project_json.get("path_with_namespace", "") 199 if not path_with_namespace: 200 return "" 201 202 url_job_log = ( 203 f"https://gitlab.freedesktop.org/{path_with_namespace}/-/jobs/{job_id}/raw" 204 ) 205 async with session.get(url_job_log) as response: 206 if response.status == 200: 207 return await response.text() 208 else: 209 logging.debug(f"No response from job log: {url_job_log}") 210 return "" 211 212 213async def search_job_log_for_errors(session, project_id, job): 214 log_error_message = "" 215 216 # Bypass these generic error messages in hopes of finding a more specific error. 217 # The entries are case insensitive. Keep them in alphabetical order and don't 218 # forget to add a comma after each entry 219 ignore_list = [ 220 "403: b", 221 "aborting", 222 "building c", 223 "continuing", 224 "error_msg : None", 225 "error_type", 226 "error generated", 227 "errors generated", 228 "exit code", 229 "exit status", 230 "exiting now", 231 "job failed", 232 "no_error", 233 "no files to upload", 234 "performing test", 235 "ret code", 236 "retry", 237 "retry-all-errors", 238 "strerror_", 239 "success", 240 "unknown-section", 241 ] 242 job_log = await get_job_log(session, project_id, job["id"]) 243 244 for line in reversed(job_log.splitlines()): 245 if "fatal" in line.lower(): 246 # remove date and formatting before fatal message 247 log_error_message = line[line.lower().find("fatal") :] 248 break 249 250 if "error" in line.lower(): 251 if any(ignore.lower() in line.lower() for ignore in ignore_list): 252 continue 253 254 # remove date and formatting before error message 255 log_error_message = line[line.lower().find("error") :].strip() 256 257 # if there is no further info after the word error then it's not helpful 258 # so reset the message and try again. 259 if log_error_message.lower() in {"error", "errors", "error:", "errors:"}: 260 log_error_message = "" 261 continue 262 break 263 264 # timeout msg from .gitlab-ci/lava/lava_job_submitter.py 265 if "expected to take at least" in line.lower(): 266 log_error_message = line 267 break 268 269 return log_error_message 270 271 272async def process_single_job(session, project_id, job): 273 job_url = job.get("web_url", "") 274 if not job_url: 275 logging.info(f"Job {job['name']} is missing a web_url") 276 277 job_name = job.get("name", "Unnamed Job") 278 message = f"[{job_name}]({job_url})" 279 280 # if a job times out it's cancelled, so worth mentioning here 281 if job["status"] == "canceled": 282 return f"{message}: canceled<br>" 283 284 # if it's not a script failure then all we can do is give the gitlab assigned reason 285 if job["failure_reason"] != "script_failure": 286 return f"{message}: {job['failure_reason']}<br>" 287 288 test_failures = await get_test_failures(session, project_id, job) 289 if test_failures: 290 return f"{message}{test_failures}" 291 292 trace_failures = await get_trace_failures(session, project_id, job) 293 if trace_failures: 294 return f"{message}: has a [trace failure]({trace_failures})<br>" 295 296 log_error_message = await search_job_log_for_errors(session, project_id, job) 297 if log_error_message: 298 return f"{message}: {log_error_message}<br>" 299 300 return f"{message}<br>" 301 302 303async def process_job_with_limit(session, project_id, job): 304 # Use at most 10 concurrent tasks 305 semaphore = asyncio.Semaphore(10) 306 async with semaphore: 307 return await process_single_job(session, project_id, job) 308 309 310async def process_problem_jobs(session, project_id, problem_jobs): 311 312 problem_jobs_count = len(problem_jobs) 313 314 if problem_jobs_count == 1: 315 message = f"<br>There were problems with job: " 316 message += await process_single_job(session, project_id, problem_jobs[0]) 317 return message 318 319 message = f"<details>" 320 message += f"<summary>" 321 message += f"There were problems with {problem_jobs_count} jobs: " 322 message += "</summary>" 323 324 tasks = [process_job_with_limit(session, project_id, job) for job in problem_jobs] 325 326 results = await asyncio.gather(*tasks) 327 328 for result in results: 329 message += result 330 331 message += f"</details>" 332 333 return message 334 335 336async def main(pipeline_id: str, project_id: str = "176") -> str: 337 338 message = "" 339 340 try: 341 timeout = aiohttp.ClientTimeout(total=120) 342 logging.basicConfig(level=logging.INFO) 343 344 async with aiohttp.ClientSession(timeout=timeout) as session: 345 pipeline_status = await get_pipeline_status( 346 session, project_id, pipeline_id 347 ) 348 logging.debug(f"Pipeline status: {pipeline_status}") 349 if pipeline_status != "failed": 350 return message 351 352 jobs = await get_jobs_for_pipeline(session, project_id, pipeline_id) 353 problem_jobs = get_problem_jobs(jobs) 354 355 if len(problem_jobs) == 0: 356 return message 357 358 message = await process_problem_jobs(session, project_id, problem_jobs) 359 except Exception as e: 360 logging.error(f"An error occurred: {e}") 361 return "" 362 363 return message 364 365 366if __name__ == "__main__": 367 parser = argparse.ArgumentParser(description="Fetch GitLab pipeline details") 368 parser.add_argument( 369 "--project-id", default="176", help="Project ID (default: 176 i.e. mesa/mesa)" 370 ) 371 parser.add_argument("--pipeline-id", required=True, help="Pipeline ID") 372 373 args = parser.parse_args() 374 375 message = asyncio.run(main(args.pipeline_id, args.project_id)) 376 377 print(message) 378