@@ -10,7 +10,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
10
10
11
11
use std:: borrow:: Cow ;
12
12
use std:: fmt:: Write ;
13
- use std:: path:: Path ;
13
+ use std:: path:: { Component , Path , PathBuf } ;
14
14
15
15
pub use self :: string:: {
16
16
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
@@ -63,30 +63,87 @@ pub fn id_from_content(content: &str) -> String {
63
63
normalize_id ( trimmed)
64
64
}
65
65
66
+ /// https://stackoverflow.com/a/68233480
67
+ /// Improve the path to try remove and solve .. token. Return the path id
68
+ /// by replacing the directory separator with a hyphen.
69
+ ///
70
+ /// This assumes that `a/b/../c` is `a/c` which might be different from
71
+ /// what the OS would have chosen when b is a link. This is OK
72
+ /// for broot verb arguments but can't be generally used elsewhere
73
+ ///
74
+ /// This function ensures a given path ending with '/' will
75
+ /// end with '-' after normalization.
76
+ pub fn normalize_path_id < P : AsRef < Path > > ( path : P ) -> String {
77
+ let ends_with_slash = path. as_ref ( ) . to_str ( ) . map_or ( false , |s| s. ends_with ( '/' ) ) ;
78
+ let mut normalized = PathBuf :: new ( ) ;
79
+ for component in path. as_ref ( ) . components ( ) {
80
+ match & component {
81
+ Component :: ParentDir => {
82
+ if !normalized. pop ( ) {
83
+ normalized. push ( component) ;
84
+ }
85
+ }
86
+ _ => {
87
+ normalized. push ( component) ;
88
+ }
89
+ }
90
+ }
91
+ if ends_with_slash {
92
+ normalized. push ( "" ) ;
93
+ }
94
+ normalized
95
+ . to_str ( )
96
+ . unwrap ( )
97
+ . replace ( "\\ " , "-" )
98
+ . replace ( "/" , "-" )
99
+ }
100
+
66
101
/// Fix links to the correct location.
67
102
///
68
103
/// This adjusts links, such as turning `.md` extensions to `.html`.
69
104
///
70
105
/// `path` is the path to the page being rendered relative to the root of the
71
106
/// book. This is used for the `print.html` page so that links on the print
72
- /// page go to the original location. Normal page rendering sets `path` to
73
- /// None. Ideally, print page links would link to anchors on the print page,
74
- /// but that is very difficult.
107
+ /// page go to the anchors that has a path id prefix. Normal page rendering
108
+ /// sets `path` to None.
75
109
fn adjust_links < ' a > ( event : Event < ' a > , path : Option < & Path > ) -> Event < ' a > {
76
110
lazy_static ! {
77
111
static ref SCHEME_LINK : Regex = Regex :: new( r"^[a-z][a-z0-9+.-]*:" ) . unwrap( ) ;
78
112
static ref MD_LINK : Regex = Regex :: new( r"(?P<link>.*)\.md(?P<anchor>#.*)?" ) . unwrap( ) ;
113
+ static ref HTML_MD_LINK : Regex =
114
+ Regex :: new( r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?" ) . unwrap( ) ;
79
115
}
80
116
81
117
fn fix < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
118
+ // Don't modify links with schemes like `https`.
119
+ if !SCHEME_LINK . is_match ( & dest) {
120
+ // This is a relative link, adjust it as necessary.
121
+ let mut fixed_link = String :: new ( ) ;
122
+ if let Some ( path) = path {
123
+ let base = path
124
+ . parent ( )
125
+ . expect ( "path can't be empty" )
126
+ . to_str ( )
127
+ . expect ( "utf-8 paths only" ) ;
128
+ if !base. is_empty ( ) {
129
+ write ! ( fixed_link, "{}/" , base) . unwrap ( ) ;
130
+ }
131
+ }
132
+ fixed_link. push_str ( & dest) ;
133
+ return CowStr :: from ( fixed_link) ;
134
+ }
135
+ dest
136
+ }
137
+
138
+ fn fix_a_links < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
82
139
if dest. starts_with ( '#' ) {
83
140
// Fragment-only link.
84
141
if let Some ( path) = path {
85
142
let mut base = path. display ( ) . to_string ( ) ;
86
143
if base. ends_with ( ".md" ) {
87
- base. replace_range ( base. len ( ) - 3 .., ".html " ) ;
144
+ base. replace_range ( base. len ( ) - 3 .., "" ) ;
88
145
}
89
- return format ! ( "{}{}" , base, dest) . into ( ) ;
146
+ return format ! ( "# {}{}" , normalize_path_id ( base) , dest. replace ( "#" , "-" ) ) . into ( ) ;
90
147
} else {
91
148
return dest;
92
149
}
@@ -106,7 +163,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
106
163
}
107
164
}
108
165
109
- if let Some ( caps) = MD_LINK . captures ( & dest) {
166
+ if let Some ( caps) = HTML_MD_LINK . captures ( & dest) {
110
167
fixed_link. push_str ( & caps[ "link" ] ) ;
111
168
fixed_link. push_str ( ".html" ) ;
112
169
if let Some ( anchor) = caps. name ( "anchor" ) {
@@ -115,6 +172,21 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
115
172
} else {
116
173
fixed_link. push_str ( & dest) ;
117
174
} ;
175
+
176
+ let path_id = normalize_path_id ( & fixed_link)
177
+ . replace ( ".html" , "" )
178
+ . replace ( "#" , "-" ) ;
179
+ // Judge if the html link is inside the book.
180
+ if !path_id. contains ( ".." ) {
181
+ if let Some ( _) = path {
182
+ // In `print.html`, print page links would all link to anchors on the print page.
183
+ let mut fixed_anchor_for_print = String :: new ( ) ;
184
+ fixed_anchor_for_print. push_str ( "#" ) ;
185
+ fixed_anchor_for_print. push_str ( & path_id) ;
186
+ return CowStr :: from ( fixed_anchor_for_print) ;
187
+ }
188
+ }
189
+ // In normal page rendering, links to anchors on another page.
118
190
return CowStr :: from ( fixed_link) ;
119
191
}
120
192
dest
@@ -130,22 +202,29 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
130
202
// feel free to add more tags if desired; these are the only ones I
131
203
// care about right now.
132
204
lazy_static ! {
133
- static ref HTML_LINK : Regex =
134
- Regex :: new( r#"(<(?:a| img) [^>]*?(?: src|href) =")([^"]+?)""# ) . unwrap( ) ;
205
+ static ref A_LINK : Regex = Regex :: new ( r#"(<a [^>]*?href=")([^"]+?)""# ) . unwrap ( ) ;
206
+ static ref HTML_LINK : Regex = Regex :: new( r#"(<img [^>]*?src=")([^"]+?)""# ) . unwrap( ) ;
135
207
}
136
208
137
- HTML_LINK
209
+ let temp_html = HTML_LINK
138
210
. replace_all ( & html, |caps : & regex:: Captures < ' _ > | {
139
211
let fixed = fix ( caps[ 2 ] . into ( ) , path) ;
140
212
format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
141
213
} )
214
+ . into_owned ( ) ;
215
+
216
+ A_LINK
217
+ . replace_all ( & temp_html, |caps : & regex:: Captures < ' _ > | {
218
+ let fixed = fix_a_links ( caps[ 2 ] . into ( ) , path) ;
219
+ format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
220
+ } )
142
221
. into_owned ( )
143
222
. into ( )
144
223
}
145
224
146
225
match event {
147
226
Event :: Start ( Tag :: Link ( link_type, dest, title) ) => {
148
- Event :: Start ( Tag :: Link ( link_type, fix ( dest, path) , title) )
227
+ Event :: Start ( Tag :: Link ( link_type, fix_a_links ( dest, path) , title) )
149
228
}
150
229
Event :: Start ( Tag :: Image ( link_type, dest, title) ) => {
151
230
Event :: Start ( Tag :: Image ( link_type, fix ( dest, path) , title) )
0 commit comments