• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# coding: utf-8
3
4"""
5Copyright (c) 2024 Huawei Device Co., Ltd.
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10    http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18Description: utils for test suite
19"""
20import argparse
21import json
22import os
23import stat
24import time as times
25from datetime import datetime, timedelta, time
26
27import requests
28import yaml
29from lxml import etree
30
31from result import get_result
32
33
34def parse_config():
35    config_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../config.yaml')
36    with open(config_file_path, 'r', encoding='utf-8') as config_file:
37        configs = yaml.safe_load(config_file)
38    return configs
39
40
41def get_url(name, page):
42    url_prefix = 'https://gitee.com/openharmony/'
43    url_suffix = f'/pulls?assignee_id=&author_id=&label_ids=&label_text=&milestone_id=&page={page}' \
44                 f'&priority=&project_type=&scope=&search=&single_label_id=&single_label_text=&' \
45                 f'sort=created_at+desc&status=merged&target_project=&tester_id='
46    url = url_prefix + name + url_suffix
47
48    return url
49
50
51def get_html(url):
52    headers = {
53        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
54                      ' (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
55    }
56    try:
57        response = requests.get(url, headers=headers)
58        if response.status_code == 200:
59            return response.text
60    except Exception:
61        print("Failed to request the page")
62        return ''
63    return ''
64
65
66def write_data(repo_name, data_file, title, committer, commit_time_str, pr_link):
67    data = {
68        'repo_name': repo_name,
69        'title': title,
70        'committer': committer,
71        'commit_time_str': commit_time_str,
72        'pr_link': pr_link
73    }
74    flags = os.O_WRONLY | os.O_CREAT
75    mode = stat.S_IWUSR | stat.S_IRUSR
76    with os.fdopen(os.open(data_file, flags, mode), 'a', encoding='utf-8') as file:
77        json.dump(data, file, ensure_ascii=False)
78        file.write('\n')
79
80
81def get_commit_records(repo_name, commit_start_time, commit_end_time):
82    data_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data.txt')
83    current_data_count = 0
84    page = 1
85    is_continue = True
86    while is_continue:
87        url = get_url(repo_name, str(page))
88        html = get_html(url)
89        tree = etree.HTML(html)
90        commit_list = tree.xpath('/html/body/div[2]/div[2]/div[2]/div[2]/div')
91        if not commit_list:
92            break
93        for commit_task in commit_list:
94            title = commit_task.xpath('.//div[1]/a/text()')[0]
95            committer = commit_task.xpath('.//div[3]/span[2]/a/span/text()')[0]
96            commit_time_str = commit_task.xpath('.//div[3]/span[4]/span/text()')[0].strip()
97            pr_link = commit_task.xpath('.//div[1]/a/@href')[0]
98            commit_time = datetime.strptime(commit_time_str, '%Y-%m-%d %H:%M')
99            if commit_start_time <= commit_time <= commit_end_time:
100                current_data_count = current_data_count + 1
101                write_data(repo_name, data_file, title, committer, commit_time_str, pr_link)
102            if commit_time < commit_start_time:
103                is_continue = False
104        page += 1
105
106    if current_data_count == 0:
107        print(f"repo {repo_name} no commit records were found within the specified time range")
108        failed_message = (f'this repo has no commit record from {commit_start_time}'
109                          f' to {commit_end_time.strftime("%Y-%m-%d %H:%M:%S")}')
110        write_data(repo_name, data_file, failed_message, None, None, None)
111
112    return current_data_count
113
114
115def retry_after_crawl_failed(repo_list, commit_start_time, commit_end_time):
116    max_retries = 5
117    try_in = 2 * 60
118    data_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data.txt')
119    if os.path.exists(data_file):
120        os.remove(data_file)
121    for i in range(max_retries):
122        try:
123            data_count = 0
124            for repo_name in repo_list:
125                current_data_count = get_commit_records(repo_name, commit_start_time, commit_end_time)
126                data_count = data_count + current_data_count
127            print(f'The data was successfully obtained, a total of {data_count} commit records were retrieved')
128            print(f'Data statistics from {commit_start_time} to {commit_end_time.strftime("%Y-%m-%d %H:%M:%S")}'
129                  f' were successfully retrieved')
130            return True
131        except Exception:
132            print(f"get data failed! retrying... ({i + 1}/{max_retries})")
133            times.sleep(try_in)
134
135    return False
136
137
138def parse_args():
139    parser = argparse.ArgumentParser()
140    parser.add_argument('--startTime', type=str, dest='start_time', default=None,
141                        help='specify crawl start time')
142    parser.add_argument('--commitRepo', type=str, dest='commit_repo', default=None,
143                        nargs='+',
144                        help='get commit message in those repos')
145    return parser.parse_args()
146
147
148def clean_log():
149    commit_log_html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
150                                    'commit_log.html')
151    if os.path.exists(commit_log_html_path):
152        os.remove(commit_log_html_path)
153
154
155def run():
156    clean_log()
157    repo_list_configs = parse_config()
158    end_time = datetime.now()
159    yesterday = datetime.now() - timedelta(days=1)
160    start_time = datetime(yesterday.year, yesterday.month, yesterday.day, 0, 0, 0)
161    repo_list = repo_list_configs.get('repo_list')
162
163    arguments = parse_args()
164    commit_start_time = repo_list_configs.get('commit_start_time') if arguments.start_time is None \
165        else arguments.start_time
166    if commit_start_time is not None:
167        time_str = datetime.strptime(commit_start_time, '%Y-%m-%d')
168        start_time = datetime.combine(time_str, time.min)
169        end_time = start_time + timedelta(days=1)
170
171    success = retry_after_crawl_failed(repo_list, start_time, end_time)
172    if not success:
173        print("Maximum retries reached, failed to crawl the data")
174    else:
175        get_result()
176
177
178if __name__ == '__main__':
179    run()