Skip to content

Commit ca1e9fa

Browse files
committed
ebook: check html syntax
1 parent 36b3fde commit ca1e9fa

File tree

7 files changed

+80
-32
lines changed

7 files changed

+80
-32
lines changed

.github/workflows/check-pr.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
persist-credentials: false
2424
fetch-depth: 1 # 0 if you want to push to repo
2525

26-
- name: Touch requirements.txt for pip caching
27-
run: touch requirements.txt
26+
- name: Preparations
27+
run: ln -s python-requirements.txt requirements.txt
2828

2929
- name: Python set up
3030
uses: actions/setup-python@v5

.github/workflows/create-release.yml

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# This manually started workflow builds PDFs and eBooks and creates a new release with these assets.
2-
name: "Create Release"
2+
3+
name: Create Release
34

45
on:
56
workflow_dispatch: # manual triggering
@@ -31,9 +32,6 @@ jobs:
3132
persist-credentials: false
3233
fetch-depth: 1 # 0 if you want to push to repo
3334

34-
- name: Touch requirements.txt for pip caching
35-
run: touch requirements.txt
36-
3735
- name: Cache LaTeX files
3836
uses: actions/cache@v4
3937
with:
@@ -48,6 +46,9 @@ jobs:
4846
hpmor*.xdv
4947
key: tex-cache
5048

49+
- name: Preparations
50+
run: ln -s python-requirements.txt requirements.txt
51+
5152
- name: Python set up
5253
uses: actions/setup-python@v5
5354
with:
@@ -63,21 +64,41 @@ jobs:
6364
- name: Install requirements
6465
run: sh scripts/install_requirements.sh > /dev/null
6566

67+
- name: Download previous hpmor.html
68+
run: |
69+
wget --quiet https://github.com/${{ github.repository }}/releases/download/WorkInProgress/hpmor.html -O hpmor-prev.html
70+
6671
- name: Make PDFs
6772
run: sh scripts/make_pdfs.sh > /dev/null
6873

6974
- name: Make eBooks
7075
run: sh scripts/make_ebooks.sh
7176

77+
- name: Compare to previous hpmor.html
78+
run: |
79+
diff -u -s hpmor-prev.html hpmor.html > hpmor-html-diff.log || :
80+
rm hpmor-prev.html
81+
82+
- name: Upload eBooks as artifact
83+
uses: actions/upload-artifact@v4
84+
with:
85+
name: ebooks
86+
path: |
87+
./hpmor-html-diff.log
88+
./hpmor.epub
89+
./hpmor.html
90+
./hpmor.pdf
91+
retention-days: 14
92+
7293
- name: Publish release
7394
uses: softprops/action-gh-release@v2
7495
with:
7596
tag_name: "${{ inputs.version_number }}"
7697
body: "${{ inputs.version_text }}"
7798
prerelease: false
7899
files: |
79-
./hpmor*.pdf
80-
./hpmor.html
81100
./hpmor.epub
82-
./hpmor.mobi
83101
./hpmor.fb2
102+
./hpmor.html
103+
./hpmor.mobi
104+
./hpmor*.pdf

.github/workflows/make.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,7 @@ jobs:
1919
persist-credentials: false
2020
fetch-depth: 1 # 0 if you want to push to repo
2121

22-
- name: Touch requirements.txt for pip caching
23-
run: touch requirements.txt
24-
25-
- name: Caching LaTeX files
22+
- name: Cache LaTeX files
2623
uses: actions/cache@v4
2724
with:
2825
path: |
@@ -36,6 +33,9 @@ jobs:
3633
hpmor*.xdv
3734
key: tex-cache
3835

36+
- name: Preparations
37+
run: ln -s python-requirements.txt requirements.txt
38+
3939
- name: Python set up
4040
uses: actions/setup-python@v5
4141
with:
@@ -90,10 +90,10 @@ jobs:
9090
with:
9191
name: ebooks
9292
path: |
93-
./hpmor.pdf
94-
./hpmor.html
9593
./hpmor-html-diff.log
9694
./hpmor.epub
95+
./hpmor.html
96+
./hpmor.pdf
9797
retention-days: 14
9898

9999
- name: Upload files to release WorkInProgress
@@ -102,9 +102,9 @@ jobs:
102102
tag_name: WorkInProgress
103103
prerelease: true
104104
files: |
105-
./hpmor*.pdf
105+
./hpmor-html-diff.log
106106
./hpmor.epub
107-
./hpmor.mobi
108107
./hpmor.fb2
109108
./hpmor.html
110-
./hpmor-html-diff.log
109+
./hpmor.mobi
110+
./hpmor*.pdf

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ ENV TZ=Europe/Berlin
99
# prevent keyboard input requests in apt install
1010
ENV DEBIAN_FRONTEND=noninteractive
1111

12-
# install core packages and other dependencies
12+
# install packages and cleanup afterwards
1313
RUN apt-get update && apt-get dist-upgrade -y && \
14-
apt-get install -y python3 git texlive-xetex texlive-lang-greek texlive-lang-german latexmk texlive-extra-utils pandoc calibre imagemagick ghostscript && \
14+
apt-get install -y python3 python3-lxml git texlive-xetex texlive-lang-greek texlive-lang-german latexmk texlive-extra-utils pandoc calibre imagemagick ghostscript && \
1515
apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/{apt,dpkg,cache,log}/
1616

1717
# set working directory

python-requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lxml
2+
pytest

scripts/ebook/step_6.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,36 @@
88

99
import os
1010
import re
11+
import sys
1112
from pathlib import Path
1213

14+
from lxml import etree # pip install lxml
15+
1316
os.chdir(Path(__file__).parent.parent.parent)
1417

1518
source_file = Path("tmp/hpmor-epub-5-html-unmod.html")
1619
target_file = Path("hpmor.html")
1720

1821

22+
def check_html(cont: str) -> None:
23+
"""Check html syntax."""
24+
parser = etree.XMLParser(recover=False) # Do not auto-fix errors
25+
try:
26+
etree.fromstring(cont, parser) # noqa: S320
27+
except etree.XMLSyntaxError as e:
28+
print("HTML Error:", e)
29+
sys.exit(1)
30+
# raise
31+
32+
1933
def fix_ellipsis(s: str) -> str:
2034
"""
2135
Fix ellipsis spacing for ebooks.
2236
"""
2337
# 1. remove all spaces around ellipsis
2438
s = re.sub(r" *… *", "…", s)
2539
# 2. recreate some spaces
26-
# before punctuation : no space, so governed by 1.
40+
# before punctuation : no space, so covered by 1.
2741
# between words
2842
s = re.sub(r"(?<=[\w])…(?=[\w])", "… ", s)
2943
# after punctuation: add space
@@ -33,7 +47,7 @@ def fix_ellipsis(s: str) -> str:
3347
s = re.sub(r"…(?=<em>)", "… ", s)
3448
# before opening EN-quotes: add space
3549
s = re.sub(r"…(?=[“])", "… ", s)
36-
# before opening DE-quotes: add space
50+
# NO: before opening DE-quotes: add space
3751
# s = re.sub(r"…(?=[„])", "… ", s)
3852
return s
3953

@@ -43,6 +57,8 @@ def fix_ellipsis(s: str) -> str:
4357

4458
with source_file.open(encoding="utf-8", newline="\n") as fh_in:
4559
cont = fh_in.read()
60+
print("checking source html")
61+
check_html(cont)
4662

4763
# remove strange leftovers from tex -> html conversion
4864
cont = re.sub(
@@ -55,7 +71,7 @@ def fix_ellipsis(s: str) -> str:
5571

5672
# stray </div> leftover
5773
cont = re.sub(
58-
r"(https://github.com/rrthomas/hpmor/</a></p>)\s+</div>",
74+
r"(github.com/rrthomas/hpmor/</a></p>)\s+</div>",
5975
r"\1",
6076
cont,
6177
flags=re.DOTALL | re.IGNORECASE,
@@ -89,15 +105,6 @@ def fix_ellipsis(s: str) -> str:
89105
# count=1,
90106
# )
91107

92-
# remove training slashes to satisfy https://validator.w3.org
93-
cont = cont.replace("<br />", "<br>")
94-
cont = cont.replace("<hr />", "<hr>")
95-
cont = re.sub(
96-
r"(<meta [^>]*) />",
97-
r"\1>",
98-
cont,
99-
)
100-
101108
# fix spaces around ellipsis
102109
cont = fix_ellipsis(cont)
103110

@@ -158,5 +165,17 @@ def fix_ellipsis(s: str) -> str:
158165
css = fh_in.read()
159166
cont = cont.replace("</style>\n", css + "\n</style>\n")
160167

168+
print("checking target html")
169+
check_html(cont)
170+
171+
# remove training slashes to satisfy https://validator.w3.org
172+
cont = cont.replace("<br />", "<br>")
173+
cont = cont.replace("<hr />", "<hr>")
174+
cont = re.sub(
175+
r"(<meta [^>]*) />",
176+
r"\1>",
177+
cont,
178+
)
179+
161180
with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
162181
fh_out.write(cont)

scripts/install_requirements.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/sh
22

3+
# ensure we are in the hpmor root dir
4+
script_dir=$(dirname $0)
5+
cd $script_dir/..
6+
37
# update apt list
48
sudo apt-get update --fix-missing
59

@@ -10,3 +14,5 @@ sudo apt-get install texlive-xetex texlive-lang-greek latexmk
1014
sudo apt-get install pandoc calibre texlive-extra-utils imagemagick ghostscript
1115
# texlive-extra-utils for latexpand
1216
# imagemagick ghostscript : for pdf title page to image conversion
17+
18+
pip install -r python-requirements.txt

0 commit comments

Comments
 (0)