Skip to content

Commit e1b1138

Browse files
authored
Merge pull request #184 from hugovk/speedup-purge
2 parents a58e9b1 + 9877195 commit e1b1138

File tree

2 files changed

+94
-35
lines changed

2 files changed

+94
-35
lines changed

build_docs.py

+93-34
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,10 @@
4545
from typing import Iterable
4646
from urllib.parse import urljoin
4747

48-
import zc.lockfile
4948
import jinja2
50-
import requests
5149
import tomlkit
52-
50+
import urllib3
51+
import zc.lockfile
5352

5453
try:
5554
from os import EX_OK, EX_SOFTWARE as EX_FAILURE
@@ -433,7 +432,8 @@ def build_robots_txt(
433432
www_root: Path,
434433
group,
435434
skip_cache_invalidation,
436-
):
435+
http: urllib3.PoolManager,
436+
) -> None:
437437
"""Disallow crawl of EOL versions in robots.txt."""
438438
if not www_root.exists():
439439
logging.info("Skipping robots.txt generation (www root does not even exist).")
@@ -448,7 +448,7 @@ def build_robots_txt(
448448
robots_file.chmod(0o775)
449449
run(["chgrp", group, robots_file])
450450
if not skip_cache_invalidation:
451-
purge("robots.txt")
451+
purge(http, "robots.txt")
452452

453453

454454
def build_sitemap(
@@ -641,7 +641,7 @@ def full_build(self):
641641
"""
642642
return not self.quick and not self.language.html_only
643643

644-
def run(self) -> bool:
644+
def run(self, http: urllib3.PoolManager) -> bool:
645645
"""Build and publish a Python doc, for a language, and a version."""
646646
start_time = perf_counter()
647647
logging.info("Running.")
@@ -652,7 +652,7 @@ def run(self) -> bool:
652652
if self.should_rebuild():
653653
self.build_venv()
654654
self.build()
655-
self.copy_build_to_webroot()
655+
self.copy_build_to_webroot(http)
656656
self.save_state(build_duration=perf_counter() - start_time)
657657
except Exception as err:
658658
logging.exception("Badly handled exception, human, please help.")
@@ -797,7 +797,7 @@ def build_venv(self):
797797
run([venv_path / "bin" / "python", "-m", "pip", "freeze", "--all"])
798798
self.venv = venv_path
799799

800-
def copy_build_to_webroot(self):
800+
def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None:
801801
"""Copy a given build to the appropriate webroot with appropriate rights."""
802802
logging.info("Publishing start.")
803803
self.www_root.mkdir(parents=True, exist_ok=True)
@@ -909,9 +909,9 @@ def copy_build_to_webroot(self):
909909
prefixes = run(["find", "-L", targets_dir, "-samefile", target]).stdout
910910
prefixes = prefixes.replace(targets_dir + "/", "")
911911
prefixes = [prefix + "/" for prefix in prefixes.split("\n") if prefix]
912-
purge(*prefixes)
912+
purge(http, *prefixes)
913913
for prefix in prefixes:
914-
purge(*[prefix + p for p in changed])
914+
purge(http, *[prefix + p for p in changed])
915915
logging.info("Publishing done")
916916

917917
def should_rebuild(self):
@@ -977,7 +977,15 @@ def save_state(self, build_duration: float):
977977
state_file.write_text(tomlkit.dumps(states), encoding="UTF-8")
978978

979979

980-
def symlink(www_root: Path, language: Language, directory: str, name: str, group: str, skip_cache_invalidation: bool):
980+
def symlink(
981+
www_root: Path,
982+
language: Language,
983+
directory: str,
984+
name: str,
985+
group: str,
986+
skip_cache_invalidation: bool,
987+
http: urllib3.PoolManager,
988+
) -> None:
981989
"""Used by major_symlinks and dev_symlink to maintain symlinks."""
982990
if language.tag == "en": # English is rooted on /, no /en/
983991
path = www_root
@@ -994,12 +1002,17 @@ def symlink(www_root: Path, language: Language, directory: str, name: str, group
9941002
link.symlink_to(directory)
9951003
run(["chown", "-h", ":" + group, str(link)])
9961004
if not skip_cache_invalidation:
997-
purge_path(www_root, link)
1005+
purge_path(http, www_root, link)
9981006

9991007

10001008
def major_symlinks(
1001-
www_root: Path, group, versions: Iterable[Version], languages: Iterable[Language], skip_cache_invalidation: bool
1002-
):
1009+
www_root: Path,
1010+
group: str,
1011+
versions: Iterable[Version],
1012+
languages: Iterable[Language],
1013+
skip_cache_invalidation: bool,
1014+
http: urllib3.PoolManager,
1015+
) -> None:
10031016
"""Maintains the /2/ and /3/ symlinks for each language.
10041017
10051018
Like:
@@ -1009,11 +1022,26 @@ def major_symlinks(
10091022
"""
10101023
current_stable = Version.current_stable(versions).name
10111024
for language in languages:
1012-
symlink(www_root, language, current_stable, "3", group, skip_cache_invalidation)
1013-
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation)
1025+
symlink(
1026+
www_root,
1027+
language,
1028+
current_stable,
1029+
"3",
1030+
group,
1031+
skip_cache_invalidation,
1032+
http,
1033+
)
1034+
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation, http)
10141035

10151036

1016-
def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidation: bool):
1037+
def dev_symlink(
1038+
www_root: Path,
1039+
group,
1040+
versions,
1041+
languages,
1042+
skip_cache_invalidation: bool,
1043+
http: urllib3.PoolManager,
1044+
) -> None:
10171045
"""Maintains the /dev/ symlinks for each language.
10181046
10191047
Like:
@@ -1023,10 +1051,18 @@ def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidat
10231051
"""
10241052
current_dev = Version.current_dev(versions).name
10251053
for language in languages:
1026-
symlink(www_root, language, current_dev, "dev", group, skip_cache_invalidation)
1054+
symlink(
1055+
www_root,
1056+
language,
1057+
current_dev,
1058+
"dev",
1059+
group,
1060+
skip_cache_invalidation,
1061+
http,
1062+
)
10271063

10281064

1029-
def purge(*paths):
1065+
def purge(http: urllib3.PoolManager, *paths: Path | str) -> None:
10301066
"""Remove one or many paths from docs.python.org's CDN.
10311067
10321068
To be used when a file changes, so the CDN fetches the new one.
@@ -1035,20 +1071,22 @@ def purge(*paths):
10351071
for path in paths:
10361072
url = urljoin(base, str(path))
10371073
logging.debug("Purging %s from CDN", url)
1038-
requests.request("PURGE", url, timeout=30)
1074+
http.request("PURGE", url, timeout=30)
10391075

10401076

1041-
def purge_path(www_root: Path, path: Path):
1077+
def purge_path(http: urllib3.PoolManager, www_root: Path, path: Path) -> None:
10421078
"""Recursively remove a path from docs.python.org's CDN.
10431079
10441080
To be used when a directory changes, so the CDN fetches the new one.
10451081
"""
1046-
purge(*[file.relative_to(www_root) for file in path.glob("**/*")])
1047-
purge(path.relative_to(www_root))
1048-
purge(str(path.relative_to(www_root)) + "/")
1082+
purge(http, *[file.relative_to(www_root) for file in path.glob("**/*")])
1083+
purge(http, path.relative_to(www_root))
1084+
purge(http, str(path.relative_to(www_root)) + "/")
10491085

10501086

1051-
def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
1087+
def proofread_canonicals(
1088+
www_root: Path, skip_cache_invalidation: bool, http: urllib3.PoolManager
1089+
) -> None:
10521090
"""In www_root we check that all canonical links point to existing contents.
10531091
10541092
It can happen that a canonical is "broken":
@@ -1070,11 +1108,12 @@ def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
10701108
html = html.replace(canonical.group(0), "")
10711109
file.write_text(html, encoding="UTF-8", errors="surrogateescape")
10721110
if not skip_cache_invalidation:
1073-
purge(str(file).replace("/srv/docs.python.org/", ""))
1111+
purge(http, str(file).replace("/srv/docs.python.org/", ""))
10741112

10751113

1076-
def parse_versions_from_devguide():
1077-
releases = requests.get(
1114+
def parse_versions_from_devguide(http: urllib3.PoolManager) -> list[Version]:
1115+
releases = http.request(
1116+
"GET",
10781117
"https://raw.githubusercontent.com/"
10791118
"python/devguide/main/include/release-cycle.json",
10801119
timeout=30,
@@ -1104,7 +1143,8 @@ def parse_languages_from_config():
11041143

11051144
def build_docs(args) -> bool:
11061145
"""Build all docs (each language and each version)."""
1107-
versions = parse_versions_from_devguide()
1146+
http = urllib3.PoolManager()
1147+
versions = parse_versions_from_devguide(http)
11081148
languages = parse_languages_from_config()
11091149
todo = [
11101150
(version, language)
@@ -1132,19 +1172,38 @@ def build_docs(args) -> bool:
11321172
builder = DocBuilder(
11331173
version, versions, language, languages, cpython_repo, **vars(args)
11341174
)
1135-
all_built_successfully &= builder.run()
1175+
all_built_successfully &= builder.run(http)
11361176
logging.root.handlers[0].setFormatter(
11371177
logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
11381178
)
11391179

11401180
build_sitemap(versions, languages, args.www_root, args.group)
11411181
build_404(args.www_root, args.group)
11421182
build_robots_txt(
1143-
versions, languages, args.www_root, args.group, args.skip_cache_invalidation
1183+
versions,
1184+
languages,
1185+
args.www_root,
1186+
args.group,
1187+
args.skip_cache_invalidation,
1188+
http,
1189+
)
1190+
major_symlinks(
1191+
args.www_root,
1192+
args.group,
1193+
versions,
1194+
languages,
1195+
args.skip_cache_invalidation,
1196+
http,
1197+
)
1198+
dev_symlink(
1199+
args.www_root,
1200+
args.group,
1201+
versions,
1202+
languages,
1203+
args.skip_cache_invalidation,
1204+
http,
11441205
)
1145-
major_symlinks(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
1146-
dev_symlink(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
1147-
proofread_canonicals(args.www_root, args.skip_cache_invalidation)
1206+
proofread_canonicals(args.www_root, args.skip_cache_invalidation, http)
11481207

11491208
return all_built_successfully
11501209

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
jinja2
2-
requests
32
sentry-sdk>=2
43
tomlkit
4+
urllib3>=2
55
zc.lockfile

0 commit comments

Comments
 (0)