Skip to content

Commit abadad4

Browse files
authored
Merge pull request #19402 from ianprime0509/markdown-autolinks
Autodoc: hyperlink URLs in text
2 parents 32b4d85 + ad34ed5 commit abadad4

File tree

4 files changed

+221
-1
lines changed

4 files changed

+221
-1
lines changed

lib/docs/wasm/markdown.zig

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@
7575
//! content. `target` may contain `\`-escaped characters and balanced
7676
//! parentheses.
7777
//!
78+
//! - **Autolink** - an abbreviated link, of the format `<target>`, where
79+
//! `target` serves as both the link target and text. `target` may not
80+
//! contain spaces or `<`, and any `\` in it are interpreted literally (not as
81+
//! escapes). `target` is expected to be an absolute URI: an autolink will not
82+
//! be recognized unless `target` starts with a URI scheme followed by a `:`.
83+
//!
84+
//! For convenience, autolinks may also be recognized in plain text without
85+
//! any `<>` delimiters. Such autolinks are restricted to start with `http://`
86+
//! or `https://` followed by at least one other character, not including any
87+
//! trailing punctuation after the link.
88+
//!
7889
//! - **Image** - a link directly preceded by a `!`. The link text is
7990
//! interpreted as the alt text of the image.
8091
//!
@@ -710,6 +721,50 @@ test "links" {
710721
);
711722
}
712723

724+
test "autolinks" {
725+
try testRender(
726+
\\<https://example.com>
727+
\\**This is important: <https://example.com/strong>**
728+
\\<https://example.com?query=abc.123#page(parens)>
729+
\\<placeholder>
730+
\\<data:>
731+
\\1 < 2
732+
\\4 > 3
733+
\\Unclosed: <
734+
\\
735+
,
736+
\\<p><a href="https://example.com">https://example.com</a>
737+
\\<strong>This is important: <a href="https://example.com/strong">https://example.com/strong</a></strong>
738+
\\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>
739+
\\&lt;placeholder&gt;
740+
\\<a href="data:">data:</a>
741+
\\1 &lt; 2
742+
\\4 &gt; 3
743+
\\Unclosed: &lt;</p>
744+
\\
745+
);
746+
}
747+
748+
test "text autolinks" {
749+
try testRender(
750+
\\Text autolinks must start with http:// or https://.
751+
\\This doesn't count: ftp://example.com.
752+
\\Example: https://ziglang.org.
753+
\\Here is an important link: **http://example.com**
754+
\\(Links may be in parentheses: https://example.com/?q=(parens))
755+
\\Escaping a link so it's plain text: https\://example.com
756+
\\
757+
,
758+
\\<p>Text autolinks must start with http:// or https://.
759+
\\This doesn't count: ftp://example.com.
760+
\\Example: <a href="https://ziglang.org">https://ziglang.org</a>.
761+
\\Here is an important link: <strong><a href="http://example.com">http://example.com</a></strong>
762+
\\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)
763+
\\Escaping a link so it's plain text: https://example.com</p>
764+
\\
765+
);
766+
}
767+
713768
test "images" {
714769
try testRender(
715770
\\![Alt text](https://example.com/image.png)

lib/docs/wasm/markdown/Document.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ pub const Node = struct {
5151
// Inlines
5252
/// Data is `link`.
5353
link,
54+
/// Data is `text`.
55+
autolink,
5456
/// Data is `link`.
5557
image,
5658
/// Data is `container`.

lib/docs/wasm/markdown/Parser.zig

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,8 +985,12 @@ const InlineParser = struct {
985985
ip.pos += 1;
986986
},
987987
']' => try ip.parseLink(),
988+
'<' => try ip.parseAutolink(),
988989
'*', '_' => try ip.parseEmphasis(),
989990
'`' => try ip.parseCodeSpan(),
991+
'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
992+
try ip.parseTextAutolink();
993+
},
990994
else => {},
991995
}
992996
}
@@ -1076,6 +1080,161 @@ const InlineParser = struct {
10761080
return @enumFromInt(string_top);
10771081
}
10781082

1083+
/// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
1084+
/// closing `>`, or remains unchanged at the opening `<` if there is none.
1085+
fn parseAutolink(ip: *InlineParser) !void {
1086+
const start = ip.pos;
1087+
ip.pos += 1;
1088+
var state: enum {
1089+
start,
1090+
scheme,
1091+
target,
1092+
} = .start;
1093+
while (ip.pos < ip.content.len) : (ip.pos += 1) {
1094+
switch (state) {
1095+
.start => switch (ip.content[ip.pos]) {
1096+
'A'...'Z', 'a'...'z' => state = .scheme,
1097+
else => break,
1098+
},
1099+
.scheme => switch (ip.content[ip.pos]) {
1100+
'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
1101+
':' => state = .target,
1102+
else => break,
1103+
},
1104+
.target => switch (ip.content[ip.pos]) {
1105+
'<', ' ', '\t', '\n' => break, // Not allowed in autolinks
1106+
'>' => {
1107+
// Backslash escapes are not recognized in autolink targets.
1108+
const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
1109+
const node = try ip.parent.addNode(.{
1110+
.tag = .autolink,
1111+
.data = .{ .text = .{
1112+
.content = target,
1113+
} },
1114+
});
1115+
try ip.completed_inlines.append(ip.parent.allocator, .{
1116+
.node = node,
1117+
.start = start,
1118+
.len = ip.pos - start + 1,
1119+
});
1120+
return;
1121+
},
1122+
else => {},
1123+
},
1124+
}
1125+
}
1126+
ip.pos = start;
1127+
}
1128+
1129+
/// Parses a plain text autolink (not delimited by `<>`), starting at the
1130+
/// first character in the link (an `h`). `ip.pos` is left at the last
1131+
/// character of the link, or remains unchanged if there is no valid link.
1132+
fn parseTextAutolink(ip: *InlineParser) !void {
1133+
const start = ip.pos;
1134+
var state: union(enum) {
1135+
/// Inside `http`. Contains the rest of the text to be matched.
1136+
http: []const u8,
1137+
after_http,
1138+
after_https,
1139+
/// Inside `://`. Contains the rest of the text to be matched.
1140+
authority: []const u8,
1141+
/// Inside link content.
1142+
content: struct {
1143+
start: usize,
1144+
paren_nesting: usize,
1145+
},
1146+
} = .{ .http = "http" };
1147+
1148+
while (ip.pos < ip.content.len) : (ip.pos += 1) {
1149+
switch (state) {
1150+
.http => |rest| {
1151+
if (ip.content[ip.pos] != rest[0]) break;
1152+
if (rest.len > 1) {
1153+
state = .{ .http = rest[1..] };
1154+
} else {
1155+
state = .after_http;
1156+
}
1157+
},
1158+
.after_http => switch (ip.content[ip.pos]) {
1159+
's' => state = .after_https,
1160+
':' => state = .{ .authority = "//" },
1161+
else => break,
1162+
},
1163+
.after_https => switch (ip.content[ip.pos]) {
1164+
':' => state = .{ .authority = "//" },
1165+
else => break,
1166+
},
1167+
.authority => |rest| {
1168+
if (ip.content[ip.pos] != rest[0]) break;
1169+
if (rest.len > 1) {
1170+
state = .{ .authority = rest[1..] };
1171+
} else {
1172+
state = .{ .content = .{
1173+
.start = ip.pos + 1,
1174+
.paren_nesting = 0,
1175+
} };
1176+
}
1177+
},
1178+
.content => |*content| switch (ip.content[ip.pos]) {
1179+
' ', '\t', '\n' => break,
1180+
'(' => content.paren_nesting += 1,
1181+
')' => if (content.paren_nesting == 0) {
1182+
break;
1183+
} else {
1184+
content.paren_nesting -= 1;
1185+
},
1186+
else => {},
1187+
},
1188+
}
1189+
}
1190+
1191+
switch (state) {
1192+
.http, .after_http, .after_https, .authority => {
1193+
ip.pos = start;
1194+
},
1195+
.content => |content| {
1196+
while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
1197+
ip.pos -= 1;
1198+
}
1199+
if (ip.pos == content.start) {
1200+
ip.pos = start;
1201+
return;
1202+
}
1203+
1204+
const target = try ip.parent.addString(ip.content[start..ip.pos]);
1205+
const node = try ip.parent.addNode(.{
1206+
.tag = .autolink,
1207+
.data = .{ .text = .{
1208+
.content = target,
1209+
} },
1210+
});
1211+
try ip.completed_inlines.append(ip.parent.allocator, .{
1212+
.node = node,
1213+
.start = start,
1214+
.len = ip.pos - start,
1215+
});
1216+
ip.pos -= 1;
1217+
},
1218+
}
1219+
}
1220+
1221+
/// Returns whether `c` may appear before a text autolink is recognized.
1222+
fn isPreTextAutolink(c: u8) bool {
1223+
return switch (c) {
1224+
' ', '\t', '\n', '*', '_', '(' => true,
1225+
else => false,
1226+
};
1227+
}
1228+
1229+
/// Returns whether `c` is punctuation that may appear after a text autolink
1230+
/// and not be considered part of it.
1231+
fn isPostTextAutolink(c: u8) bool {
1232+
return switch (c) {
1233+
'?', '!', '.', ',', ':', '*', '_' => true,
1234+
else => false,
1235+
};
1236+
}
1237+
10791238
/// Parses emphasis, starting at the beginning of a run of `*` or `_`
10801239
/// characters. `ip.pos` is left at the last character in the run after
10811240
/// parsing.

lib/docs/wasm/markdown/renderer.zig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {
140140
}
141141
try writer.writeAll("</a>");
142142
},
143+
.autolink => {
144+
const target = doc.string(data.text.content);
145+
try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
146+
},
143147
.image => {
144148
const target = doc.string(data.link.target);
145149
try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
@@ -215,7 +219,7 @@ pub fn renderInlineNodeText(
215219
try renderInlineNodeText(doc, child, writer);
216220
}
217221
},
218-
.code_span, .text => {
222+
.autolink, .code_span, .text => {
219223
const content = doc.string(data.text.content);
220224
try writer.print("{}", .{fmtHtml(content)});
221225
},

0 commit comments

Comments
 (0)