|
| 1 | +""" |
| 2 | +Generate the contributors database. |
| 3 | +
|
| 4 | +FIXME: replace `requests` calls with the HTTPie API, when available. |
| 5 | +""" |
| 6 | +import json |
| 7 | +import os |
| 8 | +import re |
| 9 | +import sys |
| 10 | +from copy import deepcopy |
| 11 | +from datetime import datetime |
| 12 | +from pathlib import Path |
| 13 | +from subprocess import check_output |
| 14 | +from time import sleep |
| 15 | +from typing import Any, Dict, Optional, Set |
| 16 | + |
| 17 | +import requests |
| 18 | + |
| 19 | +FullNames = Set[str] |
| 20 | +GitHubLogins = Set[str] |
| 21 | +Person = Dict[str, str] |
| 22 | +People = Dict[str, Person] |
| 23 | +UserInfo = Dict[str, Any] |
| 24 | + |
| 25 | +CO_AUTHORS = re.compile(r'Co-authored-by: ([^<]+) <').finditer |
| 26 | +API_URL = 'https://api.github.com' |
| 27 | +REPO = OWNER = 'httpie' |
| 28 | +REPO_URL = f'{API_URL}/repos/{REPO}/{OWNER}' |
| 29 | + |
| 30 | +HERE = Path(__file__).parent |
| 31 | +DB_FILE = HERE / 'people.json' |
| 32 | + |
| 33 | +DEFAULT_PERSON: Person = {'committed': [], 'reported': [], 'github': '', 'twitter': ''} |
| 34 | +SKIPPED_LABELS = {'invalid'} |
| 35 | + |
| 36 | +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') |
| 37 | +assert GITHUB_TOKEN, 'GITHUB_TOKEN envar is missing' |
| 38 | + |
| 39 | + |
| 40 | +class FinishedForNow(Exception): |
| 41 | + """Raised when remaining GitHub rate limit is zero.""" |
| 42 | + |
| 43 | + |
| 44 | +def main(previous_release: str, current_release: str) -> int: |
| 45 | + since = release_date(previous_release) |
| 46 | + until = release_date(current_release) |
| 47 | + |
| 48 | + contributors = load_awesome_people() |
| 49 | + try: |
| 50 | + committers = find_committers(since, until) |
| 51 | + reporters = find_reporters(since, until) |
| 52 | + except Exception as exc: |
| 53 | + # We want to save what we fetched so far. So pass. |
| 54 | + print(' !! ', exc) |
| 55 | + |
| 56 | + try: |
| 57 | + merge_all_the_people(current_release, contributors, committers, reporters) |
| 58 | + fetch_missing_users_details(contributors) |
| 59 | + except FinishedForNow: |
| 60 | + # We want to save what we fetched so far. So pass. |
| 61 | + print(' !! Committers:', committers) |
| 62 | + print(' !! Reporters:', reporters) |
| 63 | + exit_status = 1 |
| 64 | + else: |
| 65 | + exit_status = 0 |
| 66 | + |
| 67 | + save_awesome_people(contributors) |
| 68 | + return exit_status |
| 69 | + |
| 70 | + |
| 71 | +def find_committers(since: str, until: str) -> FullNames: |
| 72 | + url = f'{REPO_URL}/commits' |
| 73 | + page = 1 |
| 74 | + per_page = 100 |
| 75 | + params = { |
| 76 | + 'since': since, |
| 77 | + 'until': until, |
| 78 | + 'per_page': per_page, |
| 79 | + } |
| 80 | + committers: FullNames = set() |
| 81 | + |
| 82 | + while 'there are commits': |
| 83 | + params['page'] = page |
| 84 | + data = fetch(url, params=params) |
| 85 | + |
| 86 | + for item in data: |
| 87 | + commit = item['commit'] |
| 88 | + committers.add(commit['author']['name']) |
| 89 | + debug(' >>> Commit', item['html_url']) |
| 90 | + for co_author in CO_AUTHORS(commit['message']): |
| 91 | + name = co_author.group(1) |
| 92 | + committers.add(name) |
| 93 | + |
| 94 | + if len(data) < per_page: |
| 95 | + break |
| 96 | + page += 1 |
| 97 | + |
| 98 | + return committers |
| 99 | + |
| 100 | + |
| 101 | +def find_reporters(since: str, until: str) -> GitHubLogins: |
| 102 | + url = f'{API_URL}/search/issues' |
| 103 | + page = 1 |
| 104 | + per_page = 100 |
| 105 | + params = { |
| 106 | + 'q': f'repo:{REPO}/{OWNER} is:issue closed:{since}..{until}', |
| 107 | + 'per_page': per_page, |
| 108 | + } |
| 109 | + reporters: GitHubLogins = set() |
| 110 | + |
| 111 | + while 'there are issues': |
| 112 | + params['page'] = page |
| 113 | + data = fetch(url, params=params) |
| 114 | + |
| 115 | + for item in data['items']: |
| 116 | + # Filter out unwanted labels. |
| 117 | + if any(label['name'] in SKIPPED_LABELS for label in item['labels']): |
| 118 | + continue |
| 119 | + debug(' >>> Issue', item['html_url']) |
| 120 | + reporters.add(item['user']['login']) |
| 121 | + |
| 122 | + if len(data['items']) < per_page: |
| 123 | + break |
| 124 | + page += 1 |
| 125 | + |
| 126 | + return reporters |
| 127 | + |
| 128 | + |
| 129 | +def merge_all_the_people(release: str, contributors: People, committers: FullNames, reporters: GitHubLogins) -> None: |
| 130 | + """ |
| 131 | + >>> contributors = {'Alice': new_person(github='alice', twitter='alice')} |
| 132 | + >>> merge_all_the_people('2.6.0', contributors, {}, {}) |
| 133 | + >>> contributors |
| 134 | + {'Alice': {'committed': [], 'reported': [], 'github': 'alice', 'twitter': 'alice'}} |
| 135 | +
|
| 136 | + >>> contributors = {'Bob': new_person(github='bob', twitter='bob')} |
| 137 | + >>> merge_all_the_people('2.6.0', contributors, {'Bob'}, {'bob'}) |
| 138 | + >>> contributors |
| 139 | + {'Bob': {'committed': ['2.6.0'], 'reported': ['2.6.0'], 'github': 'bob', 'twitter': 'bob'}} |
| 140 | +
|
| 141 | + >>> contributors = {'Charlotte': new_person(github='charlotte', twitter='charlotte', committed=['2.5.0'], reported=['2.5.0'])} |
| 142 | + >>> merge_all_the_people('2.6.0', contributors, {'Charlotte'}, {'charlotte'}) |
| 143 | + >>> contributors |
| 144 | + {'Charlotte': {'committed': ['2.5.0', '2.6.0'], 'reported': ['2.5.0', '2.6.0'], 'github': 'charlotte', 'twitter': 'charlotte'}} |
| 145 | +
|
| 146 | + """ |
| 147 | + # Update known contributors. |
| 148 | + for name, details in contributors.items(): |
| 149 | + if name in committers: |
| 150 | + if release not in details['committed']: |
| 151 | + details['committed'].append(release) |
| 152 | + committers.remove(name) |
| 153 | + if details['github'] in reporters: |
| 154 | + if release not in details['reported']: |
| 155 | + details['reported'].append(release) |
| 156 | + reporters.remove(details['github']) |
| 157 | + |
| 158 | + # Add new committers. |
| 159 | + for name in committers: |
| 160 | + user_info = user(fullname=name) |
| 161 | + contributors[name] = new_person( |
| 162 | + github=user_info['login'], |
| 163 | + twitter=user_info['twitter_username'], |
| 164 | + committed=[release], |
| 165 | + ) |
| 166 | + if user_info['login'] in reporters: |
| 167 | + contributors[name]['reported'].append(release) |
| 168 | + reporters.remove(user_info['login']) |
| 169 | + |
| 170 | + # Add new reporters. |
| 171 | + for github_username in reporters: |
| 172 | + user_info = user(github_username=github_username) |
| 173 | + contributors[user_info['name'] or user_info['login']] = new_person( |
| 174 | + github=github_username, |
| 175 | + twitter=user_info['twitter_username'], |
| 176 | + reported=[release], |
| 177 | + ) |
| 178 | + |
| 179 | + |
| 180 | +def release_date(release: str) -> str: |
| 181 | + date = check_output(['git', 'log', '-1', '--format=%ai', release], text=True).strip() |
| 182 | + return datetime.strptime(date, '%Y-%m-%d %H:%M:%S %z').isoformat() |
| 183 | + |
| 184 | + |
| 185 | +def load_awesome_people() -> People: |
| 186 | + try: |
| 187 | + with DB_FILE.open(encoding='utf-8') as fh: |
| 188 | + return json.load(fh) |
| 189 | + except (FileNotFoundError, ValueError): |
| 190 | + return {} |
| 191 | + |
| 192 | + |
| 193 | +def fetch(url: str, params: Optional[Dict[str, str]] = None) -> UserInfo: |
| 194 | + headers = { |
| 195 | + 'Accept': 'application/vnd.github.v3+json', |
| 196 | + 'Authentication': f'token {GITHUB_TOKEN}' |
| 197 | + } |
| 198 | + for retry in range(1, 6): |
| 199 | + debug(f'[{retry}/5]', f'{url = }', f'{params = }') |
| 200 | + with requests.get(url, params=params, headers=headers) as req: |
| 201 | + try: |
| 202 | + req.raise_for_status() |
| 203 | + except requests.exceptions.HTTPError as exc: |
| 204 | + if exc.response.status_code == 403: |
| 205 | + # 403 Client Error: rate limit exceeded for url: ... |
| 206 | + now = int(datetime.utcnow().timestamp()) |
| 207 | + xrate_limit_reset = int(exc.response.headers['X-RateLimit-Reset']) |
| 208 | + wait = xrate_limit_reset - now |
| 209 | + if wait > 20: |
| 210 | + raise FinishedForNow() |
| 211 | + debug(' !', 'Waiting', wait, 'seconds before another try ...') |
| 212 | + sleep(wait) |
| 213 | + continue |
| 214 | + return req.json() |
| 215 | + assert ValueError('Rate limit exceeded') |
| 216 | + |
| 217 | + |
| 218 | +def new_person(**kwargs: str) -> Person: |
| 219 | + data = deepcopy(DEFAULT_PERSON) |
| 220 | + data.update(**kwargs) |
| 221 | + return data |
| 222 | + |
| 223 | + |
| 224 | +def user(fullname: Optional[str] = '', github_username: Optional[str] = '') -> UserInfo: |
| 225 | + if github_username: |
| 226 | + url = f'{API_URL}/users/{github_username}' |
| 227 | + return fetch(url) |
| 228 | + |
| 229 | + url = f'{API_URL}/search/users' |
| 230 | + for query in (f'fullname:{fullname}', f'user:{fullname}'): |
| 231 | + params = { |
| 232 | + 'q': f'repo:{REPO}/{OWNER} {query}', |
| 233 | + 'per_page': 1, |
| 234 | + } |
| 235 | + user_info = fetch(url, params=params) |
| 236 | + if user_info['items']: |
| 237 | + user_url = user_info['items'][0]['url'] |
| 238 | + return fetch(user_url) |
| 239 | + |
| 240 | + |
| 241 | +def fetch_missing_users_details(people: People) -> None: |
| 242 | + for name, details in people.items(): |
| 243 | + if details['github'] and details['twitter']: |
| 244 | + continue |
| 245 | + user_info = user(github_username=details['github'], fullname=name) |
| 246 | + if not details['github']: |
| 247 | + details['github'] = user_info['login'] |
| 248 | + if not details['twitter']: |
| 249 | + details['twitter'] = user_info['twitter_username'] |
| 250 | + |
| 251 | + |
| 252 | +def save_awesome_people(people: People) -> None: |
| 253 | + with DB_FILE.open(mode='w', encoding='utf-8') as fh: |
| 254 | + json.dump(people, fh, indent=4, sort_keys=True) |
| 255 | + |
| 256 | + |
| 257 | +def debug(*args: Any) -> None: |
| 258 | + if os.getenv('DEBUG') == '1': |
| 259 | + print(*args) |
| 260 | + |
| 261 | + |
| 262 | +if __name__ == '__main__': |
| 263 | + ret = 1 |
| 264 | + try: |
| 265 | + ret = main(*sys.argv[1:]) |
| 266 | + except TypeError: |
| 267 | + ret = 2 |
| 268 | + print(f''' |
| 269 | +Fetch contributors to a release. |
| 270 | +
|
| 271 | +Usage: |
| 272 | + python {sys.argv[0]} {sys.argv[0]} <RELEASE N-1> <RELEASE N> |
| 273 | +Example: |
| 274 | + python {sys.argv[0]} 2.4.0 2.5.0 |
| 275 | +
|
| 276 | +Define the DEBUG=1 environment variable to enable verbose output. |
| 277 | +''') |
| 278 | + except KeyboardInterrupt: |
| 279 | + ret = 255 |
| 280 | + sys.exit(ret) |
0 commit comments