Skip to content

Commit 7ee78f9

Browse files
authored
feat: Add consistent logging to pyosmeta (#270)
* Remove debug print statements in tests * [wip] Use consistent logging * Use tqdm for progress monitoring * Update changelog * Remove print statements * Fix tqdm dep * logging -> logger * Fix community partnership test * cleanup * redirect logging with tqdm context * fix logging test
1 parent 5802ea1 commit 7ee78f9

15 files changed

+143
-91
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ See [GitHub releases](https://github.com/pyOpenSci/pyosMeta/releases) page for a
66

77
## [Unreleased]
88

9+
* Use a consistent logger for informational/debug outputs. Using print statements can make it tough to track down which line of code emitted the message and using the `warnings` module will suppress recurring warnings.
10+
* Added `tqdm` as a dependency to improve progress monitoring when running data processing scripts (@banesullivan)
11+
912
## [v1.6] - 2025-02-17
1013

1114
## What's Changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ dependencies = [
2929
"python-dotenv",
3030
"requests",
3131
"ruamel-yaml>=0.17.21",
32+
"tqdm",
3233
]
3334
# This is metadata that pip reads to understand what Python versions your package supports
3435
requires-python = ">=3.10"
@@ -42,7 +43,7 @@ dev = [
4243
"pre-commit",
4344
"pytest",
4445
"pytest-cov",
45-
"pytest-mock"
46+
"pytest-mock",
4647
]
4748

4849
[project.urls]

src/pyosmeta/cli/process_reviews.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from pyosmeta import ProcessIssues
2828
from pyosmeta.github_api import GitHubAPI
29+
from pyosmeta.logging import logger
2930

3031

3132
def main():
@@ -40,15 +41,17 @@ def main():
4041
# Get all issues for approved packages - load as dict
4142
issues = process_review.get_issues()
4243
accepted_reviews, errors = process_review.parse_issues(issues)
43-
for url, error in errors.items():
44-
print(f"Error in review at url: {url}")
45-
print(error)
46-
print("-" * 20)
47-
if len(errors):
44+
if errors:
45+
logger.error("Errors found when parsing reviews (printed to stdout):")
46+
for url, error in errors.items():
47+
print(f"Error in review at url: {url}")
48+
print(error)
49+
print("-" * 20)
4850
raise RuntimeError("Errors in parsing reviews, see printout above")
4951

5052
# Update gh metrics via api for all packages
5153
# Contrib count is only available via rest api
54+
logger.info("Getting GitHub metrics for all packages...")
5255
repo_paths = process_review.get_repo_paths(accepted_reviews)
5356
all_reviews = github_api.get_gh_metrics(repo_paths, accepted_reviews)
5457

src/pyosmeta/cli/update_contributors.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@
1414
from datetime import datetime
1515

1616
from pydantic import ValidationError
17+
from tqdm import tqdm
18+
from tqdm.contrib.logging import logging_redirect_tqdm
1719

1820
from pyosmeta.contributors import ProcessContributors
1921
from pyosmeta.file_io import create_paths, load_pickle, open_yml_file
2022
from pyosmeta.github_api import GitHubAPI
23+
from pyosmeta.logging import logger
2124
from pyosmeta.models import PersonModel
2225

2326

@@ -64,29 +67,28 @@ def main():
6467

6568
# Populate all existing contribs into model objects
6669
all_contribs = {}
67-
for a_contrib in web_contribs:
68-
print(a_contrib["github_username"])
69-
try:
70-
all_contribs[a_contrib["github_username"].lower()] = PersonModel(
71-
**a_contrib
72-
)
73-
except ValidationError as ve:
74-
print(a_contrib["github_username"])
75-
print(ve)
76-
77-
print("Done processing all-contribs")
70+
for a_contrib in tqdm(web_contribs, desc="Processing all-contribs"):
71+
username = a_contrib["github_username"]
72+
tqdm.write(f"Processing {username}")
73+
with logging_redirect_tqdm():
74+
try:
75+
all_contribs[username.lower()] = PersonModel(**a_contrib)
76+
except ValidationError:
77+
logger.error(f"Error processing {username}", exc_info=True)
7878

7979
# Create a list of all contributors across repositories
8080
github_api = GitHubAPI()
8181
process_contribs = ProcessContributors(github_api, json_files)
8282
bot_all_contribs = process_contribs.combine_json_data()
8383

84-
print("Updating contrib types and searching for new users now")
85-
for key, users in bot_all_contribs.items():
84+
for key, users in tqdm(
85+
bot_all_contribs.items(),
86+
desc="Updating contrib types and searching for new users",
87+
):
8688
for gh_user in users:
8789
# Find and populate data for any new contributors
8890
if gh_user not in all_contribs.keys():
89-
print("Missing", gh_user, "Adding them now")
91+
logger.info(f"Missing {gh_user}, adding them now")
9092
new_contrib = process_contribs.return_user_info(gh_user)
9193
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
9294
all_contribs[gh_user] = PersonModel(**new_contrib)
@@ -95,8 +97,8 @@ def main():
9597
all_contribs[gh_user].add_unique_value("contributor_type", key)
9698

9799
if update_all:
98-
for user in all_contribs.keys():
99-
print("Updating all user info from github", user)
100+
for user in tqdm(all_contribs.keys(), dec="Updating all user info"):
101+
tqdm.write("Updating all user info from github for {user}")
100102
new_gh_data = process_contribs.return_user_info(user)
101103

102104
# TODO: turn this into a small update method
@@ -127,7 +129,9 @@ def main():
127129
try:
128130
setattr(data, "date_added", history[user])
129131
except KeyError:
130-
print(f"Username {user} must be new, skipping")
132+
logger.error(
133+
f"Username {user} must be new, skipping", exc_info=True
134+
)
131135

132136
# Export to pickle which supports updates after parsing reviews
133137
with open("all_contribs.pickle", "wb") as f:

src/pyosmeta/cli/update_review_teams.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424
from datetime import datetime
2525

2626
from pydantic import ValidationError
27+
from tqdm import tqdm
28+
from tqdm.contrib.logging import logging_redirect_tqdm
2729

2830
from pyosmeta.contributors import ProcessContributors
2931
from pyosmeta.file_io import clean_export_yml, load_pickle
3032
from pyosmeta.github_api import GitHubAPI
33+
from pyosmeta.logging import logger
3134
from pyosmeta.models import PersonModel, ReviewModel, ReviewUser
3235
from pyosmeta.utils_clean import get_clean_user
3336

@@ -106,13 +109,16 @@ def process_user(
106109
if gh_user not in contribs.keys():
107110
# If they aren't in the existing contribs.yml data, add them by using
108111
# their github username and hitting the github api
109-
print("Found a new contributor!", gh_user)
112+
logger.info(f"Found a new contributor: {gh_user}")
110113
new_contrib = processor.return_user_info(gh_user)
111114
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
112115
try:
113116
contribs[gh_user] = PersonModel(**new_contrib)
114-
except ValidationError as ve:
115-
print(ve)
117+
except ValidationError:
118+
logger.error(
119+
f"Error processing new contributor {gh_user}. Skipping this user.",
120+
exc_info=True,
121+
)
116122

117123
# Update user the list of contribution types if there are new types to add
118124
# for instance a new reviewer would have a "Reviewer" contributor type
@@ -143,33 +149,42 @@ def main():
143149

144150
contrib_types = process_contribs.contrib_types
145151

146-
for pkg_name, review in packages.items():
147-
print("Processing review team for:", pkg_name)
148-
for role in contrib_types.keys():
149-
user: list[ReviewUser] | ReviewUser = getattr(review, role)
150-
151-
# Eic is a newer field, so in some instances it will be empty
152-
# if it's empty print a message noting the data are missing
153-
if user:
154-
# Handle lists or single users separately
155-
if isinstance(user, list):
156-
for i, a_user in enumerate(user):
157-
a_user, contribs = process_user(
158-
a_user, role, pkg_name, contribs, process_contribs
152+
for pkg_name, review in tqdm(
153+
packages.items(), desc="Processing review teams"
154+
):
155+
with logging_redirect_tqdm():
156+
tqdm.write(f"Processing review team for: {pkg_name}")
157+
for role in contrib_types.keys():
158+
user: list[ReviewUser] | ReviewUser = getattr(review, role)
159+
160+
# Eic is a newer field, so in some instances it will be empty
161+
# if it's empty log a message noting the data are missing
162+
if user:
163+
# Handle lists or single users separately
164+
if isinstance(user, list):
165+
for i, a_user in enumerate(user):
166+
a_user, contribs = process_user(
167+
a_user,
168+
role,
169+
pkg_name,
170+
contribs,
171+
process_contribs,
172+
)
173+
# Update individual user in reference to issue list
174+
user[i] = a_user
175+
elif isinstance(user, ReviewUser):
176+
user, contribs = process_user(
177+
user, role, pkg_name, contribs, process_contribs
178+
)
179+
setattr(review, role, user)
180+
else:
181+
raise TypeError(
182+
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
159183
)
160-
# Update individual user in reference to issue list
161-
user[i] = a_user
162-
elif isinstance(user, ReviewUser):
163-
user, contribs = process_user(
164-
user, role, pkg_name, contribs, process_contribs
165-
)
166-
setattr(review, role, user)
167184
else:
168-
raise TypeError(
169-
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
185+
logger.warning(
186+
f"I can't find a username for {role} under {pkg_name}. Moving on."
170187
)
171-
else:
172-
print(f"I can't find a username for {role}. Moving on.")
173188

174189
# Export to yaml
175190
contribs_ls = [model.model_dump() for model in contribs.values()]

src/pyosmeta/contributors.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import requests
66

77
from .github_api import GitHubAPI
8+
from .logging import logger
89

910

1011
@dataclass
@@ -102,8 +103,10 @@ def load_json(self, json_path: str) -> dict:
102103
"""
103104
try:
104105
response = requests.get(json_path)
105-
except Exception as ae:
106-
print(ae)
106+
except Exception:
107+
logger.error(
108+
f"Error loading json file: {json_path}", exec_info=True
109+
)
107110
return json.loads(response.text)
108111

109112
def process_json_file(self, json_file: str) -> Tuple[str, List]:
@@ -150,8 +153,10 @@ def combine_json_data(self) -> dict:
150153
try:
151154
key, users = self.process_json_file(json_file)
152155
combined_data[key] = users
153-
except Exception as e:
154-
print("Oops - can't process", json_file, e)
156+
except Exception:
157+
logger.error(
158+
f"Oops - can't process: {json_file}", exc_info=True
159+
)
155160
return combined_data
156161

157162
def return_user_info(
@@ -269,6 +274,6 @@ def combine_users(self, repoDict: dict, webDict: dict) -> dict:
269274

270275
# If the user is not in the web dict, add them
271276
else:
272-
print("New user found. Adding: ", gh_user)
277+
logger.info(f"New user found. Adding: {gh_user}")
273278
webDict[gh_user] = repoDict[gh_user]
274279
return webDict

src/pyosmeta/file_io.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import ruamel.yaml
66
from ruamel.yaml import YAML
77

8+
from .logging import logger
9+
810

911
def load_pickle(filename):
1012
"""Opens a pickle"""
@@ -84,8 +86,8 @@ def open_yml_file(file_path: str) -> dict:
8486
with urllib.request.urlopen(file_path) as f:
8587
yaml = YAML(typ="safe", pure=True)
8688
return yaml.load(f)
87-
except urllib.error.URLError as url_error:
88-
print("Oops - can find the url", file_path, url_error)
89+
except urllib.error.URLError:
90+
logger.error(f"Oops - can find the url: {file_path}", exc_info=True)
8991

9092

9193
def export_yaml(filename: str, data_list: list):

src/pyosmeta/github_api.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
numbers, stars and more "health & stability" related metrics
1010
"""
1111

12-
import logging
1312
import os
1413
import time
1514
from dataclasses import dataclass
@@ -20,6 +19,8 @@
2019

2120
from pyosmeta.models import ReviewModel
2221

22+
from .logging import logger
23+
2324

2425
@dataclass
2526
class GitHubAPI:
@@ -172,7 +173,7 @@ def _get_response_rest(self, url: str) -> list[dict[str, Any]]:
172173

173174
except requests.HTTPError as exception:
174175
if exception.response.status_code == 401:
175-
logging.error(
176+
logger.error(
176177
"Unauthorized request. Your token may be expired or invalid. Please refresh your token."
177178
)
178179
else:
@@ -237,7 +238,7 @@ def _get_contrib_count_rest(self, url: str) -> int | None:
237238
contributors = self._get_response_rest(repo_contribs_url)
238239

239240
if not contributors:
240-
logging.warning(
241+
logger.warning(
241242
f"Repository not found: {repo_contribs_url}. Did the repo URL change?"
242243
)
243244
return None
@@ -339,19 +340,19 @@ def _get_metrics_graphql(
339340
]["edges"][0]["node"]["committedDate"],
340341
}
341342
elif response.status_code == 404:
342-
logging.warning(
343+
logger.warning(
343344
f"Repository not found: {repo_info['owner']}/{repo_info['repo_name']}. Did the repo URL change?"
344345
)
345346
return None
346347
elif response.status_code == 403:
347-
logging.warning(
348+
logger.warning(
348349
f"Oops! You may have hit an API limit for repository: {repo_info['owner']}/{repo_info['repo_name']}.\n"
349350
f"API Response Text: {response.text}\n"
350351
f"API Response Headers: {response.headers}"
351352
)
352353
return None
353354
else:
354-
logging.warning(
355+
logger.warning(
355356
f"Unexpected HTTP error: {response.status_code} for repository: {repo_info['owner']}/{repo_info['repo_name']}"
356357
)
357358
return None

src/pyosmeta/logging.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import logging
2+
3+
logger = logging.getLogger(__name__)
4+
logger.setLevel(logging.INFO)

src/pyosmeta/models/base.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
field_validator,
1818
)
1919

20+
from pyosmeta.logging import logger
2021
from pyosmeta.models.github import Labels
2122
from pyosmeta.utils_clean import (
2223
check_url,
@@ -58,15 +59,19 @@ def format_url(cls, url: str) -> str:
5859
return url # Returns empty string if url is empty
5960
else:
6061
if url.startswith("http://"):
61-
print(f"{url} 'http://' replacing w 'https://'")
62+
logger.warning(
63+
f"Oops, http protocol for {url}, changing to https"
64+
)
6265
url = url.replace("http://", "https://")
6366
elif not url.startswith("http"):
64-
print("Oops, missing http")
67+
logger.warning(
68+
"Oops, missing http protocol for {url}, adding it"
69+
)
6570
url = "https://" + url
6671
if check_url(url=url):
6772
return url
6873
else: # pragma: no cover
69-
print(f"Oops, url `{url}` is not valid, removing it")
74+
logger.warning(f"Oops, url `{url}` is not valid, removing it")
7075
return None
7176

7277

0 commit comments

Comments
 (0)