Skip to content

Commit 16ad812

Browse files
Replace get_organization_repositories with GraphQL implementation
1 parent a0b402b commit 16ad812

File tree

1 file changed

+135
-53
lines changed

1 file changed

+135
-53
lines changed

main.py

Lines changed: 135 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@
140140
Protocol,
141141
)
142142
from enum import Enum
143+
from collections import defaultdict
143144
from difflib import SequenceMatcher
144145
from http.client import HTTPSConnection, HTTPResponse
145146
from dataclasses import dataclass
@@ -757,62 +758,143 @@ def get_team_members(self, org: str, team: Team) -> Iterable[TeamMember]:
757758
team_name=team.name,
758759
)
759760

760-
def get_repository_teams(
761-
self, org: str, repo: str
762-
) -> Iterable[TeamRepositoryAccess]:
763-
teams = self._http_get_json_paginated(f"/repos/{org}/{repo}/teams")
764-
for team in teams:
765-
permissions: Dict[str, bool] = team["permissions"]
766-
yield TeamRepositoryAccess(
767-
team_name=team["name"],
768-
role=RepositoryAccessRole.from_permissions_dict(permissions),
769-
)
761+
def get_organization_repo_to_teams_map(self, org: str) -> dict[str, [TeamRepositoryaccess]]:
762+
query = """
763+
query($org: String!, $cursor: String) {
764+
organization(login: $org) {
765+
teams(first: 100) {
766+
nodes {
767+
name
768+
repositories(first: 100, after: $cursor) {
769+
edges {
770+
permission
771+
node {
772+
databaseId
773+
}
774+
}
775+
pageInfo {
776+
hasNextPage
777+
endCursor
778+
}
779+
totalCount
780+
}
781+
}
782+
pageInfo {
783+
hasNextPage
784+
}
785+
}
786+
}
787+
}
788+
"""
770789

771-
def get_repository_users(
772-
self, org: str, repo: str
773-
) -> Iterable[UserRepositoryAccess]:
774-
# We query with affiliation=direct to get all users that have explicit
775-
# access to the repository (i.e. not those who have implicit access
776-
# through being a member of a group). The default is affiliation=all,
777-
# which also returns users with implicit access.
778-
users = self._http_get_json_paginated(f"/repos/{org}/{repo}/collaborators?affiliation=direct")
779-
for user in users:
780-
permissions: Dict[str, bool] = user["permissions"]
781-
yield UserRepositoryAccess(
782-
user_id=user["id"],
783-
user_name=user["login"],
784-
role=RepositoryAccessRole.from_permissions_dict(permissions),
785-
)
790+
repo_to_teams: defaultdict[str, [TeamRepositoryaccess]] = defaultdict(list)
791+
792+
cursor = None
793+
while True:
794+
variables = { "org": org, "cursor": cursor }
795+
response = self._http_graphql(query, variables)
796+
797+
teams = response['organization']['teams']
798+
# Assume we have less than 100 teams and skip pagination
799+
assert(teams['pageInfo']['hasNextPage'] == False)
800+
801+
has_next_page = False
802+
next_cursors = []
803+
804+
for team in teams['nodes']:
805+
for repo in team['repositories']['edges']:
806+
repo_to_teams[repo['node']['databaseId']].append(TeamRepositoryAccess(
807+
team_name=team['name'],
808+
role=RepositoryAccessRole(repo['permission'].lower())
809+
))
810+
811+
team_has_next_page = team['repositories']['pageInfo']['hasNextPage']
812+
has_next_page |= team_has_next_page
813+
if team_has_next_page:
814+
next_cursors.append(team['repositories']['pageInfo']['endCursor'])
815+
816+
if not has_next_page:
817+
break
818+
819+
[cursor] = set(next_cursors) # Asserts that all next cursors are the same
820+
821+
print(json.dumps({ key: [team.team_name for team in teams] for key, teams in repo_to_teams.items()}))
822+
return dict(repo_to_teams)
786823

787824
def get_organization_repositories(self, org: str) -> Iterable[Repository]:
788-
# Listing repositories is a slow endpoint, and paginated as well, print
789-
# some progress. Technically from the pagination headers we could
790-
# extract more precise progress, but I am not going to bother.
791-
print_status_stderr("[1 / ??] Listing organization repositories")
792-
repos = []
793-
for i, more_repos in enumerate(
794-
self._http_get_json_paginated(f"/orgs/{org}/repos?per_page=100")
795-
):
796-
repos.append(more_repos)
797-
print_status_stderr(
798-
f"[{len(repos)} / ??] Listing organization repositories"
799-
)
800-
# Materialize to a list so we know the total so we can show a progress
801-
# counter.
802-
n = len(repos)
803-
for i, repo in enumerate(repos):
804-
name = repo["name"]
805-
print_status_stderr(f"[{i + 1} / {n}] Getting access on {name}")
806-
user_access = tuple(sorted(self.get_repository_users(org, name)))
807-
team_access = tuple(sorted(self.get_repository_teams(org, name)))
808-
yield Repository(
809-
repo_id=repo["id"],
810-
name=name,
811-
visibility=RepositoryVisibility(repo["visibility"]),
812-
user_access=user_access,
813-
team_access=team_access,
814-
)
815-
print_status_stderr("")
825+
query = """
826+
query($org: String!, $cursor: String) {
827+
organization(login: $org) {
828+
repositories(first:100, after: $cursor) {
829+
nodes {
830+
databaseId
831+
name
832+
visibility
833+
# We query with affiliation=direct to get all users that have explicit
834+
# access to the repository (i.e. not those who have implicit access
835+
# through being a member of a group). The default is affiliation=all,
836+
# which also returns users with implicit access.
837+
collaborators(affiliation: DIRECT, first: 100) {
838+
edges {
839+
node {
840+
databaseId
841+
login
842+
}
843+
permission
844+
}
845+
pageInfo {
846+
hasNextPage
847+
}
848+
}
849+
}
850+
pageInfo {
851+
hasNextPage
852+
endCursor
853+
}
854+
totalCount
855+
}
856+
}
857+
}
858+
"""
859+
860+
repo_to_teams = self.get_organization_repo_to_teams_map(org)
861+
862+
cursor = None
863+
while True:
864+
variables = { "org": org, "cursor": cursor }
865+
print(f"shooting repositories query for cursor {cursor}")
866+
response = self._http_graphql(query, variables)
867+
868+
repos = response['organization']['repositories']
869+
870+
for repo in repos['nodes']:
871+
repo_id = repo['databaseId']
872+
873+
collaborators = repo['collaborators']
874+
# Assume we have less than 100 directs collaborators to any repo and skip pagination
875+
assert(collaborators['pageInfo']['hasNextPage'] == False)
876+
user_access = tuple(sorted(UserRepositoryAccess(
877+
user_id=collaborator['node']['databaseId'],
878+
user_name=collaborator['node']['login'],
879+
role=RepositoryAccessRole(collaborator['permission'].lower()),
880+
) for collaborator in collaborators['edges']))
881+
882+
if repo_id == 733475299:
883+
print(f"BLEHBLEH {repo['name']}")
884+
team_access = tuple(sorted(repo_to_teams.get(repo_id, [])))
885+
886+
yield Repository(
887+
repo_id=repo_id,
888+
name=repo['name'],
889+
visibility=RepositoryVisibility(repo["visibility"].lower()),
890+
user_access=user_access,
891+
team_access=team_access,
892+
)
893+
894+
page_info = repos['pageInfo']
895+
if not page_info['hasNextPage']:
896+
break
897+
cursor = page_info['endCursor']
816898

817899

818900
def print_indented(lines: str) -> None:

0 commit comments

Comments
 (0)