Skip to content

Commit d72a785

Browse files
committed
Make print page (print.html) links link to anchors on the print page
Let all the anchors id on the print page to have a path id prefix to help locate. e.g. bar/foo.md#abc -> #bar-foo-abc Also append a dummy div to the start of the original page to make sure that original page links without an anchor can also be located. Signed-off-by: Hollow Man <[email protected]>
1 parent 2213312 commit d72a785

File tree

3 files changed

+141
-23
lines changed

3 files changed

+141
-23
lines changed

src/renderer/html_handlebars/hbs_renderer.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,19 @@ impl HtmlHandlebars {
6363
print_content
6464
.push_str(r#"<div style="break-before: page; page-break-before: always;"></div>"#);
6565
}
66-
print_content.push_str(&fixed_content);
66+
let path_id = {
67+
let mut base = path.display().to_string();
68+
if base.ends_with(".md") {
69+
base.replace_range(base.len() - 3.., "");
70+
}
71+
&base.replace("/", "-").replace("\\", "-")
72+
};
73+
74+
// We have to build header links in advance so that we can know the ranges
75+
// for the headers in one page.
76+
// Insert a dummy div to make sure that we can locate the specific page.
77+
print_content.push_str(&(format!(r#"<div id="{}"></div>"#, &path_id)));
78+
print_content.push_str(&build_header_links(&fixed_content, Some(path_id)));
6779

6880
// Update the context with data for this file
6981
let ctx_path = path
@@ -181,19 +193,31 @@ impl HtmlHandlebars {
181193
}
182194

183195
#[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
184-
fn post_process(
196+
fn post_process_print(
185197
&self,
186198
rendered: String,
187199
playground_config: &Playground,
188200
edition: Option<RustEdition>,
189201
) -> String {
190-
let rendered = build_header_links(&rendered);
191202
let rendered = fix_code_blocks(&rendered);
192203
let rendered = add_playground_pre(&rendered, playground_config, edition);
193204

194205
rendered
195206
}
196207

208+
#[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
209+
fn post_process(
210+
&self,
211+
rendered: String,
212+
playground_config: &Playground,
213+
edition: Option<RustEdition>,
214+
) -> String {
215+
let rendered = build_header_links(&rendered, None);
216+
let rendered = self.post_process_print(rendered, &playground_config, edition);
217+
218+
rendered
219+
}
220+
197221
fn copy_static_files(
198222
&self,
199223
destination: &Path,
@@ -547,7 +571,7 @@ impl Renderer for HtmlHandlebars {
547571
let rendered = handlebars.render("index", &data)?;
548572

549573
let rendered =
550-
self.post_process(rendered, &html_config.playground, ctx.config.rust.edition);
574+
self.post_process_print(rendered, &html_config.playground, ctx.config.rust.edition);
551575

552576
utils::fs::write_file(destination, "print.html", rendered.as_bytes())?;
553577
debug!("Creating print.html ✓");
@@ -746,7 +770,7 @@ fn make_data(
746770

747771
/// Goes through the rendered HTML, making sure all header tags have
748772
/// an anchor respectively so people can link to sections directly.
749-
fn build_header_links(html: &str) -> String {
773+
fn build_header_links(html: &str, path_id: Option<&str>) -> String {
750774
let regex = Regex::new(r"<h(\d)>(.*?)</h\d>").unwrap();
751775
let mut id_counter = HashMap::new();
752776

@@ -756,25 +780,40 @@ fn build_header_links(html: &str) -> String {
756780
.parse()
757781
.expect("Regex should ensure we only ever get numbers here");
758782

759-
insert_link_into_header(level, &caps[2], &mut id_counter)
783+
insert_link_into_header(level, &caps[2], &mut id_counter, path_id)
760784
})
761785
.into_owned()
762786
}
763787

764788
/// Insert a sinle link into a header, making sure each link gets its own
765789
/// unique ID by appending an auto-incremented number (if necessary).
790+
///
791+
/// For `print.html`, we will add a path id prefix.
766792
fn insert_link_into_header(
767793
level: usize,
768794
content: &str,
769795
id_counter: &mut HashMap<String, usize>,
796+
path_id: Option<&str>,
770797
) -> String {
771798
let raw_id = utils::id_from_content(content);
772799

773800
let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
774801

775802
let id = match *id_count {
776-
0 => raw_id,
777-
other => format!("{}-{}", raw_id, other),
803+
0 => {
804+
if let Some(path_id) = path_id {
805+
format!("{}-{}", path_id, raw_id)
806+
} else {
807+
raw_id
808+
}
809+
}
810+
other => {
811+
if let Some(path_id) = path_id {
812+
format!("{}-{}-{}", path_id, raw_id, other)
813+
} else {
814+
format!("{}-{}", raw_id, other)
815+
}
816+
}
778817
};
779818

780819
*id_count += 1;
@@ -980,7 +1019,7 @@ mod tests {
9801019
];
9811020

9821021
for (src, should_be) in inputs {
983-
let got = build_header_links(src);
1022+
let got = build_header_links(src, None);
9841023
assert_eq!(got, should_be);
9851024
}
9861025
}

src/utils/mod.rs

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
1010

1111
use std::borrow::Cow;
1212
use std::fmt::Write;
13-
use std::path::Path;
13+
use std::path::{Component, Path, PathBuf};
1414

1515
pub use self::string::{
1616
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
@@ -63,30 +63,87 @@ pub fn id_from_content(content: &str) -> String {
6363
normalize_id(trimmed)
6464
}
6565

66+
/// https://stackoverflow.com/a/68233480
67+
/// Improve the path to try remove and solve .. token. Return the path id
68+
/// by replacing the directory separator with a hyphen.
69+
///
70+
/// This assumes that `a/b/../c` is `a/c` which might be different from
71+
/// what the OS would have chosen when b is a link. This is OK
72+
/// for broot verb arguments but can't be generally used elsewhere
73+
///
74+
/// This function ensures a given path ending with '/' will
75+
/// end with '-' after normalization.
76+
pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
77+
let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
78+
let mut normalized = PathBuf::new();
79+
for component in path.as_ref().components() {
80+
match &component {
81+
Component::ParentDir => {
82+
if !normalized.pop() {
83+
normalized.push(component);
84+
}
85+
}
86+
_ => {
87+
normalized.push(component);
88+
}
89+
}
90+
}
91+
if ends_with_slash {
92+
normalized.push("");
93+
}
94+
normalized
95+
.to_str()
96+
.unwrap()
97+
.replace("\\", "-")
98+
.replace("/", "-")
99+
}
100+
66101
/// Fix links to the correct location.
67102
///
68103
/// This adjusts links, such as turning `.md` extensions to `.html`.
69104
///
70105
/// `path` is the path to the page being rendered relative to the root of the
71106
/// book. This is used for the `print.html` page so that links on the print
72-
/// page go to the original location. Normal page rendering sets `path` to
73-
/// None. Ideally, print page links would link to anchors on the print page,
74-
/// but that is very difficult.
107+
/// page go to the anchors that has a path id prefix. Normal page rendering
108+
/// sets `path` to None.
75109
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
76110
lazy_static! {
77111
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
78112
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
113+
static ref HTML_MD_LINK: Regex =
114+
Regex::new(r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?").unwrap();
79115
}
80116

81117
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
118+
// Don't modify links with schemes like `https`.
119+
if !SCHEME_LINK.is_match(&dest) {
120+
// This is a relative link, adjust it as necessary.
121+
let mut fixed_link = String::new();
122+
if let Some(path) = path {
123+
let base = path
124+
.parent()
125+
.expect("path can't be empty")
126+
.to_str()
127+
.expect("utf-8 paths only");
128+
if !base.is_empty() {
129+
write!(fixed_link, "{}/", base).unwrap();
130+
}
131+
}
132+
fixed_link.push_str(&dest);
133+
return CowStr::from(fixed_link);
134+
}
135+
dest
136+
}
137+
138+
fn fix_a_links<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
82139
if dest.starts_with('#') {
83140
// Fragment-only link.
84141
if let Some(path) = path {
85142
let mut base = path.display().to_string();
86143
if base.ends_with(".md") {
87-
base.replace_range(base.len() - 3.., ".html");
144+
base.replace_range(base.len() - 3.., "");
88145
}
89-
return format!("{}{}", base, dest).into();
146+
return format!("#{}{}", normalize_path_id(base), dest.replace("#", "-")).into();
90147
} else {
91148
return dest;
92149
}
@@ -106,7 +163,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
106163
}
107164
}
108165

109-
if let Some(caps) = MD_LINK.captures(&dest) {
166+
if let Some(caps) = HTML_MD_LINK.captures(&dest) {
110167
fixed_link.push_str(&caps["link"]);
111168
fixed_link.push_str(".html");
112169
if let Some(anchor) = caps.name("anchor") {
@@ -115,6 +172,21 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
115172
} else {
116173
fixed_link.push_str(&dest);
117174
};
175+
176+
let path_id = normalize_path_id(&fixed_link)
177+
.replace(".html", "")
178+
.replace("#", "-");
179+
// Judge if the html link is inside the book.
180+
if !path_id.contains("..") {
181+
if let Some(_) = path {
182+
// In `print.html`, print page links would all link to anchors on the print page.
183+
let mut fixed_anchor_for_print = String::new();
184+
fixed_anchor_for_print.push_str("#");
185+
fixed_anchor_for_print.push_str(&path_id);
186+
return CowStr::from(fixed_anchor_for_print);
187+
}
188+
}
189+
// In normal page rendering, links to anchors on another page.
118190
return CowStr::from(fixed_link);
119191
}
120192
dest
@@ -130,22 +202,29 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
130202
// feel free to add more tags if desired; these are the only ones I
131203
// care about right now.
132204
lazy_static! {
133-
static ref HTML_LINK: Regex =
134-
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
205+
static ref A_LINK: Regex = Regex::new(r#"(<a [^>]*?href=")([^"]+?)""#).unwrap();
206+
static ref HTML_LINK: Regex = Regex::new(r#"(<img [^>]*?src=")([^"]+?)""#).unwrap();
135207
}
136208

137-
HTML_LINK
209+
let temp_html = HTML_LINK
138210
.replace_all(&html, |caps: &regex::Captures<'_>| {
139211
let fixed = fix(caps[2].into(), path);
140212
format!("{}{}\"", &caps[1], fixed)
141213
})
214+
.into_owned();
215+
216+
A_LINK
217+
.replace_all(&temp_html, |caps: &regex::Captures<'_>| {
218+
let fixed = fix_a_links(caps[2].into(), path);
219+
format!("{}{}\"", &caps[1], fixed)
220+
})
142221
.into_owned()
143222
.into()
144223
}
145224

146225
match event {
147226
Event::Start(Tag::Link(link_type, dest, title)) => {
148-
Event::Start(Tag::Link(link_type, fix(dest, path), title))
227+
Event::Start(Tag::Link(link_type, fix_a_links(dest, path), title))
149228
}
150229
Event::Start(Tag::Image(link_type, dest, title)) => {
151230
Event::Start(Tag::Image(link_type, fix(dest, path), title))

tests/rendered_output.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ fn check_correct_relative_links_in_print_page() {
125125
assert_contains_strings(
126126
first.join("print.html"),
127127
&[
128-
r##"<a href="second/../first/nested.html">the first section</a>,"##,
128+
r##"<a href="#first-nested">the first section</a>,"##,
129129
r##"<a href="second/../../std/foo/bar.html">outside</a>"##,
130130
r##"<img src="second/../images/picture.png" alt="Some image" />"##,
131-
r##"<a href="second/nested.html#some-section">fragment link</a>"##,
132-
r##"<a href="second/../first/markdown.html">HTML Link</a>"##,
131+
r##"<a href="#second-nested-some-section">fragment link</a>"##,
132+
r##"<a href="#first-markdown">HTML Link</a>"##,
133133
r##"<img src="second/../images/picture.png" alt="raw html">"##,
134134
],
135135
);

0 commit comments

Comments
 (0)