Skip to content

Commit 0af1d79

Browse files
committed
rustdoc: word wrap CamelCase in the item list table
This is an alternative to ee6459d. That is, it fixes the issue that affects the very long type names in https://docs.rs/async-stripe/0.31.0/stripe/index.html#structs. This is, necessarily, a pile of nasty heuristics. We need to balance a few issues: - Sometimes, there's no real word break. For example, `BTreeMap` should be `BTree<wbr>Map`, not `B<wbr>Tree<wbr>Map`. - Sometimes, there's a legit word break, but the name is tiny and the HTML overhead isn't worth it. For example, if we're typesetting `TyCtx`, writing `Ty<wbr>Ctx` would have an HTML overhead of 50%. Line breaking inside it makes no sense.
1 parent 2a1c384 commit 0af1d79

9 files changed

+115
-5
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -5013,6 +5013,7 @@ dependencies = [
50135013
"tracing",
50145014
"tracing-subscriber",
50155015
"tracing-tree",
5016+
"unicode-segmentation",
50165017
]
50175018

50185019
[[package]]

src/librustdoc/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ tempfile = "3"
2323
tracing = "0.1"
2424
tracing-tree = "0.3.0"
2525
threadpool = "1.8.1"
26+
unicode-segmentation = "1.9"
2627

2728
[dependencies.tracing-subscriber]
2829
version = "0.3.3"

src/librustdoc/html/escape.rs

+43
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! string of text (for use in a format string).
55
66
use std::fmt;
7+
use unicode_segmentation::UnicodeSegmentation;
78

89
/// Wrapper struct which will emit the HTML-escaped version of the contained
910
/// string when passed to a format string.
@@ -74,3 +75,45 @@ impl<'a> fmt::Display for EscapeBodyText<'a> {
7475
Ok(())
7576
}
7677
}
78+
79+
/// Wrapper struct which will emit the HTML-escaped version of the contained
80+
/// string when passed to a format string. This function also word-breaks
81+
/// CamelCase and snake_case word names.
82+
///
83+
/// This is only safe to use for text nodes. If you need your output to be
84+
/// safely contained in an attribute, use [`Escape`]. If you don't know the
85+
/// difference, use [`Escape`].
86+
pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str);
87+
88+
impl<'a> fmt::Display for EscapeBodyTextWithWbr<'a> {
89+
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
90+
let EscapeBodyTextWithWbr(text) = *self;
91+
if text.len() < 8 {
92+
return EscapeBodyText(text).fmt(fmt);
93+
}
94+
let mut last = 0;
95+
let mut it = text.grapheme_indices(true).peekable();
96+
let _ = it.next(); // don't insert wbr before first char
97+
while let Some((i, s)) = it.next() {
98+
let pk = it.peek();
99+
let is_uppercase = || s.chars().any(|c| c.is_uppercase());
100+
let next_is_uppercase =
101+
|| pk.map_or(true, |(_, t)| t.chars().any(|c| c.is_uppercase()));
102+
let next_is_underscore = || pk.map_or(true, |(_, t)| t.contains('_'));
103+
if (i - last > 3 && is_uppercase() && !next_is_uppercase())
104+
|| (s.contains('_') && !next_is_underscore())
105+
{
106+
EscapeBodyText(&text[last..i]).fmt(fmt)?;
107+
fmt.write_str("<wbr>")?;
108+
last = i;
109+
}
110+
}
111+
if last < text.len() {
112+
EscapeBodyText(&text[last..]).fmt(fmt)?;
113+
}
114+
Ok(())
115+
}
116+
}
117+
118+
#[cfg(test)]
119+
mod tests;

src/librustdoc/html/escape/tests.rs

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// basic examples
2+
#[test]
3+
fn escape_body_text_with_wbr() {
4+
use super::EscapeBodyTextWithWbr as E;
5+
// extreme corner cases
6+
assert_eq!(&E("").to_string(), "");
7+
assert_eq!(&E("a").to_string(), "a");
8+
assert_eq!(&E("A").to_string(), "A");
9+
// real(istic) examples
10+
assert_eq!(&E("FirstSecond").to_string(), "First<wbr>Second");
11+
assert_eq!(&E("First_Second").to_string(), "First<wbr>_Second");
12+
assert_eq!(&E("First<T>_Second").to_string(), "First&lt;<wbr>T&gt;<wbr>_Second");
13+
assert_eq!(&E("first_second").to_string(), "first<wbr>_second");
14+
assert_eq!(&E("MY_CONSTANT").to_string(), "MY<wbr>_CONSTANT");
15+
assert_eq!(&E("___________").to_string(), "___________");
16+
// a string won't get wrapped if it's less than 8 bytes
17+
assert_eq!(&E("HashSet").to_string(), "HashSet");
18+
// an individual word won't get wrapped if it's less than 4 bytes
19+
assert_eq!(&E("VecDequeue").to_string(), "VecDequeue");
20+
assert_eq!(&E("VecDequeueSet").to_string(), "VecDequeue<wbr>Set");
21+
// how to handle acronyms
22+
assert_eq!(&E("BTreeMap").to_string(), "BTree<wbr>Map");
23+
assert_eq!(&E("HTTPSProxy").to_string(), "HTTPS<wbr>Proxy");
24+
// more corners
25+
assert_eq!(&E("ṼẽçÑñéå").to_string(), "Ṽẽç<wbr>Ññéå");
26+
assert_eq!(&E("V\u{0300}e\u{0300}c\u{0300}D\u{0300}e\u{0300}q\u{0300}u\u{0300}e\u{0300}u\u{0300}e\u{0300}").to_string(), "V\u{0300}e\u{0300}c\u{0300}<wbr>D\u{0300}e\u{0300}q\u{0300}u\u{0300}e\u{0300}u\u{0300}e\u{0300}");
27+
assert_eq!(&E("LPFNACCESSIBLEOBJECTFROMWINDOW").to_string(), "LPFNACCESSIBLEOBJECTFROMWINDOW");
28+
}
29+
// property test
30+
#[test]
31+
fn escape_body_text_with_wbr_makes_sense() {
32+
use super::EscapeBodyTextWithWbr as E;
33+
use itertools::Itertools as _;
34+
const C: [u8; 3] = [b'a', b'A', b'_'];
35+
for chars in [
36+
C.into_iter(),
37+
C.into_iter(),
38+
C.into_iter(),
39+
C.into_iter(),
40+
C.into_iter(),
41+
C.into_iter(),
42+
C.into_iter(),
43+
C.into_iter(),
44+
]
45+
.into_iter()
46+
.multi_cartesian_product()
47+
{
48+
let s = String::from_utf8(chars).unwrap();
49+
assert_eq!(s.len(), 8);
50+
let esc = E(&s).to_string();
51+
assert!(!esc.contains("<wbr><wbr>"));
52+
assert!(!esc.ends_with("<wbr>"));
53+
assert!(!esc.starts_with("<wbr>"));
54+
assert_eq!(&esc.replace("<wbr>", ""), &s);
55+
}
56+
}

src/librustdoc/html/format.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use crate::clean::{
3333
};
3434
use crate::formats::cache::Cache;
3535
use crate::formats::item_type::ItemType;
36-
use crate::html::escape::Escape;
36+
use crate::html::escape::{Escape, EscapeBodyText};
3737
use crate::html::render::Context;
3838
use crate::passes::collect_intra_doc_links::UrlFragment;
3939

@@ -992,6 +992,7 @@ pub(crate) fn anchor<'a, 'cx: 'a>(
992992
f,
993993
r#"<a class="{short_ty}" href="{url}" title="{short_ty} {path}">{text}</a>"#,
994994
path = join_with_double_colon(&fqp),
995+
text = EscapeBodyText(text.as_str()),
995996
)
996997
} else {
997998
f.write_str(text.as_str())

src/librustdoc/html/render/print_item.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::clean;
2626
use crate::config::ModuleSorting;
2727
use crate::formats::item_type::ItemType;
2828
use crate::formats::Impl;
29-
use crate::html::escape::Escape;
29+
use crate::html::escape::{Escape, EscapeBodyTextWithWbr};
3030
use crate::html::format::{
3131
display_fn, join_with_double_colon, print_abi_with_space, print_constness_with_space,
3232
print_where_clause, visibility_print_with_space, Buffer, Ending, PrintWithSpace,
@@ -436,7 +436,7 @@ fn item_module(w: &mut Buffer, cx: &mut Context<'_>, item: &clean::Item, items:
436436
"<div class=\"item-name\"><code>{}extern crate {} as {};",
437437
visibility_print_with_space(myitem, cx),
438438
anchor(myitem.item_id.expect_def_id(), src, cx),
439-
myitem.name.unwrap(),
439+
EscapeBodyTextWithWbr(myitem.name.unwrap().as_str()),
440440
),
441441
None => write!(
442442
w,
@@ -533,7 +533,7 @@ fn item_module(w: &mut Buffer, cx: &mut Context<'_>, item: &clean::Item, items:
533533
{stab_tags}\
534534
</div>\
535535
{docs_before}{docs}{docs_after}",
536-
name = myitem.name.unwrap(),
536+
name = EscapeBodyTextWithWbr(myitem.name.unwrap().as_str()),
537537
visibility_and_hidden = visibility_and_hidden,
538538
stab_tags = extra_info_tags(myitem, item, tcx),
539539
class = myitem.type_(),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<li><div class="item-name"><a class="struct" href="struct.CreateSubscriptionPaymentSettingsPaymentMethodOptionsCustomerBalanceBankTransferEuBankTransfer.html" title="struct extremely_long_typename::CreateSubscriptionPaymentSettingsPaymentMethodOptionsCustomerBalanceBankTransferEuBankTransfer">Create<wbr />Subscription<wbr />Payment<wbr />Settings<wbr />Payment<wbr />Method<wbr />Options<wbr />Customer<wbr />Balance<wbr />Bank<wbr />Transfer<wbr />EuBank<wbr />Transfer</a></div></li>
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// ignore-tidy-linelength
2+
// Make sure that, if an extremely long type name is named,
3+
// the item table has it line wrapped.
4+
// There should be some reasonably-placed `<wbr>` tags in the snapshot file.
5+
6+
// @snapshot extremely_long_typename "extremely_long_typename/index.html" '//ul[@class="item-table"]/li'
7+
pub struct CreateSubscriptionPaymentSettingsPaymentMethodOptionsCustomerBalanceBankTransferEuBankTransfer;
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
<ul class="item-table"><li><div class="item-name"><a class="constant" href="constant.MY_CONSTANT.html" title="constant item_desc_list_at_start::MY_CONSTANT">MY_CONSTANT</a></div><div class="desc docblock-short">Groups: <code>SamplePatternSGIS</code>, <code>SamplePatternEXT</code></div></li></ul>
1+
<ul class="item-table"><li><div class="item-name"><a class="constant" href="constant.MY_CONSTANT.html" title="constant item_desc_list_at_start::MY_CONSTANT">MY<wbr />_CONSTANT</a></div><div class="desc docblock-short">Groups: <code>SamplePatternSGIS</code>, <code>SamplePatternEXT</code></div></li></ul>

0 commit comments

Comments
 (0)