Skip to content

Commit 57e0339

Browse files
committed
Drop XMLRPC API to use PyPI Index API
The XMLRPC is now obsolete and deprecated and even the parts that may not be deprecated no longer work. Insteas, let's use the simple Index API using JSON to collect all packages and their changes. Reference: pypa#1897 Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent db92238 commit 57e0339

File tree

11 files changed

+66
-89
lines changed

11 files changed

+66
-89
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
## Big Fixes
88

99
- Support reading HTTP proxy URLs from environment variables, and SOCKS proxy URLs from the 'mirror.proxy' config option `PR #1861`
10+
- Drop support for Pypi XMLPRC API and use instead the new Index API to get all packages `ISSUE #1897`
1011

1112
# 6.6.0
1213

README.md

-2
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,6 @@ parts of PyPI that are needed to support package installation. It does not
164164
support more dynamic APIs of PyPI that maybe be used by various clients for
165165
other purposes.
166166

167-
An example of an unsupported API is [PyPI's XML-RPC interface](https://warehouse.readthedocs.io/api-reference/xml-rpc/), which is used when running `pip search`.
168-
169167
### Bandersnatch Mission
170168

171169
The bandersnatch project strives to:

docs/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Bandersnatch documentation
88
bandersnatch is a PyPI mirror client according to `PEP 381`
99
https://www.python.org/dev/peps/pep-0381/.
1010

11-
Bandersnatch hits the XMLRPC API of pypi.org to get all packages with serial
11+
Bandersnatch hits the Index JSON API of pypi.org to get all packages with serial
1212
or packages since the last run's serial. bandersnatch then uses the JSON API
1313
of PyPI to get shasums and release file paths to download and workout where
1414
to layout the package files on a POSIX file system.

requirements.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
aiohttp==3.11.12
22
aiohttp-socks==0.10.1
3-
aiohttp-xmlrpc==1.5.0
43
async-timeout==5.0.1
54
attrs==25.1.0
65
chardet==5.2.0

requirements_docs.txt

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ packaging==24.2
55
requests==2.32.3
66
sphinx==8.2.1
77
MyST-Parser==4.0.1
8-
xmlrpc2==0.3.1
98
sphinx-argparse-cli==1.19.0
109

1110
git+https://github.com/pypa/pypa-docs-theme.git#egg=pypa-docs-theme

setup.cfg

+1-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ version = 6.7.0.dev0
2222
install_requires =
2323
aiohttp
2424
aiohttp-socks
25-
aiohttp-xmlrpc
2625
filelock
2726
humanfriendly
2827
importlib_metadata
@@ -93,5 +92,5 @@ s3 =
9392
[isort]
9493
atomic = true
9594
profile = black
96-
known_third_party = _pytest,aiohttp,aiohttp_socks,aiohttp_xmlrpc,filelock,freezegun,keystoneauth1,mock_config,packaging,pkg_resources,pytest,setuptools,swiftclient
95+
known_third_party = _pytest,aiohttp,aiohttp_socks,filelock,freezegun,keystoneauth1,mock_config,packaging,pkg_resources,pytest,setuptools,swiftclient
9796
known_first_party = bandersnatch,bandersnatch_filter_plugins,bandersnatch_storage_plugins

src/bandersnatch/master.py

+35-51
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from typing import Any
99

1010
import aiohttp
11-
from aiohttp_xmlrpc.client import ServerProxy
1211

1312
import bandersnatch
1413
from bandersnatch.config.proxy import get_aiohttp_proxy_kwargs, proxy_address_from_env
@@ -28,10 +27,6 @@ class StalePage(Exception):
2827
"""We got a page back from PyPI that doesn't meet our expected serial."""
2928

3029

31-
class XmlRpcError(aiohttp.ClientError):
32-
"""Issue getting package listing from PyPI Repository"""
33-
34-
3530
class Master:
3631
def __init__(
3732
self,
@@ -141,58 +136,47 @@ async def url_fetch(
141136
fd.write(chunk)
142137

143138
@property
144-
def xmlrpc_url(self) -> str:
145-
return f"{self.url}/pypi"
146-
147-
# TODO: Potentially make USER_AGENT more accessible from aiohttp-xmlrpc
148-
async def _gen_custom_headers(self) -> dict[str, str]:
149-
# Create dummy client so we can copy the USER_AGENT + prepend bandersnatch info
150-
dummy_client = ServerProxy(self.xmlrpc_url, loop=self.loop)
151-
custom_headers = {
152-
"User-Agent": (
153-
f"bandersnatch {bandersnatch.__version__} {dummy_client.USER_AGENT}"
154-
)
139+
def simple_url(self) -> str:
140+
return f"{self.url}/simple/"
141+
142+
# TODO: Potentially make USER_AGENT more accessible from aiohttp
143+
@property
144+
def _custom_headers(self) -> dict[str, str]:
145+
return {
146+
"User-Agent": f"bandersnatch {bandersnatch.__version__}",
147+
# the simple API use headers to return JSON
148+
"Accept": "application/vnd.pypi.simple.v1+json",
155149
}
156-
await dummy_client.close()
157-
return custom_headers
158-
159-
async def _gen_xmlrpc_client(self) -> ServerProxy:
160-
custom_headers = await self._gen_custom_headers()
161-
client = ServerProxy(
162-
self.xmlrpc_url,
163-
client=self.session,
164-
loop=self.loop,
165-
headers=custom_headers,
166-
)
167-
return client
168150

169-
# TODO: Add an async context manager to aiohttp-xmlrpc to replace this function
170-
async def rpc(self, method_name: str, serial: int = 0) -> Any:
171-
try:
172-
client = await self._gen_xmlrpc_client()
173-
method = getattr(client, method_name)
174-
if serial:
175-
return await method(serial)
176-
return await method()
177-
except TimeoutError as te:
178-
logger.error(f"Call to {method_name} @ {self.xmlrpc_url} timed out: {te}")
151+
async def fetch_simple_index(self) -> Any:
152+
"""Return a mapping of all project data from the PyPI Index API"""
153+
logger.debug(f"Fetching simple JSON index from {self.simple_url}")
154+
async with self.session.get(
155+
self.simple_url, headers=self._custom_headers
156+
) as response:
157+
simple_index = await response.json()
158+
return simple_index
179159

180160
async def all_packages(self) -> Any:
181-
all_packages_with_serial = await self.rpc("list_packages_with_serial")
182-
if not all_packages_with_serial:
183-
raise XmlRpcError("Unable to get full list of packages")
184-
return all_packages_with_serial
161+
"""Return a mapping of all project names as {name: last_serial}"""
162+
simple_index = await self.fetch_simple_index()
163+
if not simple_index:
164+
return {}
165+
all_packages = {
166+
project["name"]: project["_last-serial"]
167+
for project in simple_index["projects"]
168+
}
169+
logger.debug(f"Fetched #{len(all_packages)} from simple JSON index")
170+
return all_packages
185171

186172
async def changed_packages(self, last_serial: int) -> dict[str, int]:
187-
changelog = await self.rpc("changelog_since_serial", last_serial)
188-
if changelog is None:
189-
changelog = []
190-
191-
packages: dict[str, int] = {}
192-
for package, _version, _time, _action, serial in changelog:
193-
if serial > packages.get(package, 0):
194-
packages[package] = serial
195-
return packages
173+
"""Return a mapping of all project names changed since last serial as {name: last_serial}"""
174+
all_packages = await self.all_packages()
175+
changed_packages = {
176+
pkg: ser for pkg, ser in all_packages.items() if ser > last_serial
177+
}
178+
logger.debug(f"Fetched #{len(changed_packages)} changed packages")
179+
return changed_packages
196180

197181
async def get_package_metadata(self, package_name: str, serial: int = 0) -> Any:
198182
try:

src/bandersnatch/mirror.py

-1
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,6 @@ async def process_package(self, package: Package) -> None:
337337
await loop.run_in_executor(
338338
self.storage_backend.executor, self.sync_simple_pages, package
339339
)
340-
# XMLRPC PyPI Endpoint stores raw_name so we need to provide it
341340
await loop.run_in_executor(
342341
self.storage_backend.executor,
343342
self.record_finished_package,

src/bandersnatch/tests/conftest.py

-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ def session_side_effect(*args: Any, **kwargs: Any) -> Any:
150150
return FakeAiohttpClient()
151151

152152
master = Master("https://pypi.example.com")
153-
master.rpc = mock.Mock() # type: ignore
154153
master.session = mock.MagicMock()
155154
master.session.get.side_effect = session_side_effect
156155
master.session.request.side_effect = session_side_effect

src/bandersnatch/tests/test_master.py

+26-27
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import bandersnatch
9-
from bandersnatch.master import Master, StalePage, XmlRpcError
9+
from bandersnatch.master import Master, StalePage
1010

1111

1212
@pytest.mark.asyncio
@@ -16,45 +16,44 @@ async def test_disallow_http() -> None:
1616

1717

1818
@pytest.mark.asyncio
19-
async def test_rpc_url(master: Master) -> None:
20-
assert master.xmlrpc_url == "https://pypi.example.com/pypi"
19+
async def test_self_simple_url(master: Master) -> None:
20+
assert master.simple_url == "https://pypi.example.com/simple/"
2121

2222

2323
@pytest.mark.asyncio
2424
async def test_all_packages(master: Master) -> None:
25-
expected = [["aiohttp", "", "", "", "69"]]
26-
master.rpc = AsyncMock(return_value=expected) # type: ignore
25+
simple_index = {
26+
"meta": {"_last-serial": 22, "api-version": "1.1"},
27+
"projects": [
28+
{"_last-serial": 20, "name": "foobar"},
29+
{"_last-serial": 18, "name": "baz"},
30+
],
31+
}
32+
33+
master.fetch_simple_index = AsyncMock(return_value=simple_index) # type: ignore
2734
packages = await master.all_packages()
28-
assert expected == packages
29-
30-
31-
@pytest.mark.asyncio
32-
async def test_all_packages_raises(master: Master) -> None:
33-
master.rpc = AsyncMock(return_value=[]) # type: ignore
34-
with pytest.raises(XmlRpcError):
35-
await master.all_packages()
35+
assert packages == {"foobar": 20, "baz": 18}
3636

3737

3838
@pytest.mark.asyncio
3939
async def test_changed_packages_no_changes(master: Master) -> None:
40-
master.rpc = AsyncMock(return_value=None) # type: ignore
40+
master.fetch_simple_index = AsyncMock(return_value=None) # type: ignore
4141
changes = await master.changed_packages(4)
4242
assert changes == {}
4343

4444

4545
@pytest.mark.asyncio
4646
async def test_changed_packages_with_changes(master: Master) -> None:
47-
list_of_package_changes = [
48-
("foobar", "1", 0, "added", 17),
49-
("baz", "2", 1, "updated", 18),
50-
("foobar", "1", 0, "changed", 20),
51-
# The server usually just hands out monotonous serials in the
52-
# changelog. This verifies that we don't fail even with garbage input.
53-
("foobar", "1", 0, "changed", 19),
54-
]
55-
master.rpc = AsyncMock(return_value=list_of_package_changes) # type: ignore
47+
simple_index = {
48+
"meta": {"_last-serial": 22, "api-version": "1.1"},
49+
"projects": [
50+
{"_last-serial": 20, "name": "foobar"},
51+
{"_last-serial": 18, "name": "baz"},
52+
],
53+
}
54+
master.fetch_simple_index = AsyncMock(return_value=simple_index) # type: ignore
5655
changes = await master.changed_packages(4)
57-
assert changes == {"baz": 18, "foobar": 20}
56+
assert changes == {"foobar": 20, "baz": 18}
5857

5958

6059
@pytest.mark.asyncio
@@ -79,9 +78,9 @@ async def test_master_url_fetch(master: Master) -> None:
7978

8079

8180
@pytest.mark.asyncio
82-
async def test_xmlrpc_user_agent(master: Master) -> None:
83-
client = await master._gen_xmlrpc_client()
84-
assert f"bandersnatch {bandersnatch.__version__}" in client.headers["User-Agent"]
81+
async def test__simple_index_user_agent(master: Master) -> None:
82+
headers = master._custom_headers
83+
assert f"bandersnatch {bandersnatch.__version__}" in headers["User-Agent"]
8584

8685

8786
@pytest.mark.asyncio

src/test_tools/test_xmlrpc.py src/test_tools/test_simple_index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22

33
"""
4-
Quick tool to test xmlrpc queries from bandersnatch
4+
Quick tool to test PyPI Index API queries from bandersnatch
55
"""
66

77
import asyncio
@@ -12,7 +12,7 @@
1212
async def main() -> int:
1313
async with Master("https://pypi.org") as master:
1414
all_packages = await master.all_packages()
15-
print(f"PyPI returned {len(all_packages)} PyPI packages via xmlrpc")
15+
print(f"PyPI returned {len(all_packages)} PyPI packages via Index API")
1616
return 0
1717

1818

0 commit comments

Comments
 (0)