Skip to content

Commit d4e6c8c

Browse files
committed
Add on_demand content to repair_metadata
1 parent b090afa commit d4e6c8c

File tree

3 files changed

+169
-3
lines changed

3 files changed

+169
-3
lines changed

pulp_python/app/tasks/repair.py

+46-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
from pulpcore.plugin.util import get_domain
88

99
from pulp_python.app.models import PythonPackageContent, PythonRepository
10-
from pulp_python.app.utils import artifact_to_python_content_data
10+
from pulp_python.app.utils import (
11+
artifact_to_python_content_data,
12+
remote_artifact_to_python_content_data,
13+
)
1114

1215
log = logging.getLogger(__name__)
1316

@@ -49,22 +52,29 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4952
"""
5053
# TODO: Add on_demand content repair
5154
immediate_content = content.filter(contentartifact__artifact__isnull=False)
55+
on_demand_content = content.filter(contentartifact__remoteartifact__isnull=False)
56+
# todo: distinct() to avoid duplication?
5257
domain = get_domain()
5358

5459
batch = []
5560
set_of_update_fields = set()
5661
total_repaired = 0
62+
processed_pks = set()
5763

5864
progress_report = ProgressReport(
5965
message="Repairing packages' metadata",
6066
code="repair.metadata",
61-
total=immediate_content.count(),
67+
total=immediate_content.count() + on_demand_content.count(),
6268
)
6369
progress_report.save()
6470
with progress_report:
6571
for package in progress_report.iter(
6672
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000)
6773
):
74+
if package.pk in processed_pks:
75+
continue
76+
processed_pks.add(package.pk)
77+
6878
new_data = artifact_to_python_content_data(
6979
package.filename, package._artifacts.get(), domain
7080
)
@@ -82,6 +92,40 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
8292
batch = []
8393
set_of_update_fields.clear()
8494

95+
for package in progress_report.iter(
96+
on_demand_content.prefetch_related(
97+
"contentartifact_set__remoteartifact_set"
98+
).iterator(chunk_size=1000)
99+
):
100+
if package.pk in processed_pks:
101+
continue
102+
processed_pks.add(package.pk)
103+
104+
# todo
105+
for content_artifact in package.contentartifact_set.all():
106+
for ra in content_artifact.remoteartifact_set.all():
107+
remote_artifact = ra
108+
break
109+
if remote_artifact:
110+
break
111+
112+
new_data = remote_artifact_to_python_content_data(
113+
package.filename, remote_artifact, domain
114+
)
115+
changed = False
116+
for field, value in new_data.items():
117+
if getattr(package, field) != value:
118+
setattr(package, field, value)
119+
set_of_update_fields.add(field)
120+
changed = True
121+
if changed:
122+
batch.append(package)
123+
if len(batch) == 1000:
124+
total_repaired += len(batch)
125+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
126+
batch = []
127+
set_of_update_fields.clear()
128+
85129
if batch:
86130
total_repaired += len(batch)
87131
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)

pulp_python/app/utils.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import pkginfo
22
import re
3+
import requests
34
import shutil
45
import tempfile
56
import json
67
from collections import defaultdict
78
from django.conf import settings
89
from jinja2 import Template
9-
from packaging.utils import canonicalize_name
10+
from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename
1011
from packaging.requirements import Requirement
1112
from packaging.version import parse, InvalidVersion
1213

@@ -189,6 +190,64 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
189190
return data
190191

191192

193+
def get_project_metadata_from_pypi_json(package_name, version):
194+
"""
195+
Fetches metadata for a specific version of a given package from PyPI's JSON API.
196+
"""
197+
# todo: fix URL
198+
# https://pypi.org/pypi/scipy/1.1.0/json
199+
# https://fixtures.pulpproject.org/python-pypi/pypi/scipy/1.1.0/json
200+
URL = "https://fixtures.pulpproject.org/python-pypi/"
201+
202+
url = f"{URL}pypi/{package_name}/{version}/json"
203+
# todo: raise if version does not exist
204+
response = requests.get(url, timeout=10)
205+
response.raise_for_status()
206+
data = response.json()
207+
208+
return data["info"]
209+
210+
211+
def get_packagetype_and_python_version(filename):
212+
extensions = list(DIST_EXTENSIONS.keys())
213+
pkg_type_index = [filename.endswith(ext) for ext in extensions].index(True)
214+
packagetype = DIST_EXTENSIONS[extensions[pkg_type_index]]
215+
216+
if packagetype == "sdist":
217+
python_version = "source"
218+
else:
219+
pyver = ""
220+
regex = DIST_REGEXES[extensions[pkg_type_index]]
221+
if bdist_name := regex.match(filename):
222+
pyver = bdist_name.group("pyver") or ""
223+
python_version = pyver
224+
225+
return packagetype, python_version
226+
227+
228+
def remote_artifact_to_python_content_data(filename, remote_artifact, domain=None):
229+
ra_filename = remote_artifact.url.rsplit("/", 1)[-1]
230+
# todo: better handle, more formats?
231+
if ra_filename.endswith(".whl"):
232+
name, version, *_ = parse_wheel_filename(ra_filename)
233+
elif ra_filename.endswith((".tar.gz", ".zip")):
234+
name, version = parse_sdist_filename(ra_filename)
235+
236+
metadata = get_project_metadata_from_pypi_json(name, version)
237+
238+
# todo: rewrite
239+
packagetype, python_version = get_packagetype_and_python_version(filename)
240+
metadata["packagetype"] = packagetype
241+
metadata["python_version"] = python_version
242+
243+
data = parse_project_metadata(metadata)
244+
# data['sha256'] = remote_artifact.sha256
245+
data["filename"] = filename
246+
data["pulp_domain"] = domain or remote_artifact.pulp_domain
247+
data["_pulp_domain"] = data["pulp_domain"]
248+
return data
249+
250+
192251
def python_content_to_json(base_path, content_query, version=None, domain=None):
193252
"""
194253
Converts a QuerySet of PythonPackageContent into the PyPi JSON format

pulp_python/tests/functional/api/test_repair.py

+63
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,69 @@ def _create(artifact_filename, filename, content_data):
3232
return _create
3333

3434

35+
@pytest.fixture
36+
def create_content_remote(python_bindings):
37+
def _create(filename, r_artifact_url, content_data, remote):
38+
commands = (
39+
"from pulpcore.plugin.models import ContentArtifact, RemoteArtifact; "
40+
"from pulpcore.plugin.util import extract_pk, get_url; "
41+
"from pulp_python.app.models import PythonPackageContent, PythonRemote; "
42+
f"c = PythonPackageContent(filename={filename!r}, **{content_data!r}); "
43+
"c.save(); "
44+
f"ca = ContentArtifact(artifact=None, content=c, relative_path={filename!r}); "
45+
"ca.save(); "
46+
f"remote_obj = PythonRemote.objects.get(pk=extract_pk({remote.prn!r})); "
47+
f"ra = RemoteArtifact(content_artifact=ca, remote=remote_obj, url={r_artifact_url!r}); "
48+
"ra.save(); "
49+
"print(get_url(c))"
50+
)
51+
process = subprocess.run(
52+
["pulpcore-manager", "shell", "-c", commands], capture_output=True
53+
)
54+
55+
assert process.returncode == 0
56+
content_href = process.stdout.decode().strip()
57+
return python_bindings.ContentPackagesApi.read(content_href)
58+
59+
return _create
60+
61+
62+
def test_metadata_repair_endpoint_on_demand(
63+
create_content_remote,
64+
monitor_task,
65+
move_to_repository,
66+
python_bindings,
67+
python_remote_factory,
68+
python_repo_factory,
69+
):
70+
python_egg_filename = "scipy-1.1.0.tar.gz"
71+
python_egg_url = urljoin(
72+
urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename
73+
)
74+
data = {
75+
"name": "scipy",
76+
# Wrong metadata
77+
"author": "ME",
78+
"packagetype": "bdist",
79+
"requires_python": ">=3.8",
80+
"version": "0.2",
81+
}
82+
remote = python_remote_factory(includes=["scipy"])
83+
repo = python_repo_factory(remote=remote)
84+
85+
content = create_content_remote(python_egg_filename, python_egg_url, data, remote)
86+
move_to_repository(repo.pulp_href, [content.pulp_href])
87+
88+
response = python_bindings.RepositoriesPythonApi.repair_metadata(repo.pulp_href)
89+
monitor_task(response.task)
90+
91+
new_content = python_bindings.ContentPackagesApi.read(content.pulp_href)
92+
assert new_content.author == ""
93+
assert new_content.packagetype == "sdist"
94+
assert new_content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
95+
assert new_content.version == "1.1.0"
96+
97+
3598
@pytest.fixture
3699
def move_to_repository(python_bindings, monitor_task):
37100
def _move(repo_href, content_hrefs):

0 commit comments

Comments
 (0)