@@ -985,8 +985,12 @@ const InlineParser = struct {
985
985
ip .pos += 1 ;
986
986
},
987
987
']' = > try ip .parseLink (),
988
+ '<' = > try ip .parseAutolink (),
988
989
'*' , '_' = > try ip .parseEmphasis (),
989
990
'`' = > try ip .parseCodeSpan (),
991
+ 'h' = > if (ip .pos == 0 or isPreTextAutolink (ip .content [ip .pos - 1 ])) {
992
+ try ip .parseTextAutolink ();
993
+ },
990
994
else = > {},
991
995
}
992
996
}
@@ -1076,6 +1080,161 @@ const InlineParser = struct {
1076
1080
return @enumFromInt (string_top );
1077
1081
}
1078
1082
1083
+ /// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
1084
+ /// closing `>`, or remains unchanged at the opening `<` if there is none.
1085
+ fn parseAutolink (ip : * InlineParser ) ! void {
1086
+ const start = ip .pos ;
1087
+ ip .pos += 1 ;
1088
+ var state : enum {
1089
+ start ,
1090
+ scheme ,
1091
+ target ,
1092
+ } = .start ;
1093
+ while (ip .pos < ip .content .len ) : (ip .pos += 1 ) {
1094
+ switch (state ) {
1095
+ .start = > switch (ip .content [ip .pos ]) {
1096
+ 'A' ... 'Z' , 'a' ... 'z' = > state = .scheme ,
1097
+ else = > break ,
1098
+ },
1099
+ .scheme = > switch (ip .content [ip .pos ]) {
1100
+ 'A' ... 'Z' , 'a' ... 'z' , '0' ... '9' , '+' , '.' , '-' = > {},
1101
+ ':' = > state = .target ,
1102
+ else = > break ,
1103
+ },
1104
+ .target = > switch (ip .content [ip .pos ]) {
1105
+ '<' , ' ' , '\t ' , '\n ' = > break , // Not allowed in autolinks
1106
+ '>' = > {
1107
+ // Backslash escapes are not recognized in autolink targets.
1108
+ const target = try ip .parent .addString (ip .content [start + 1 .. ip .pos ]);
1109
+ const node = try ip .parent .addNode (.{
1110
+ .tag = .autolink ,
1111
+ .data = .{ .text = .{
1112
+ .content = target ,
1113
+ } },
1114
+ });
1115
+ try ip .completed_inlines .append (ip .parent .allocator , .{
1116
+ .node = node ,
1117
+ .start = start ,
1118
+ .len = ip .pos - start + 1 ,
1119
+ });
1120
+ return ;
1121
+ },
1122
+ else = > {},
1123
+ },
1124
+ }
1125
+ }
1126
+ ip .pos = start ;
1127
+ }
1128
+
1129
+ /// Parses a plain text autolink (not delimited by `<>`), starting at the
1130
+ /// first character in the link (an `h`). `ip.pos` is left at the last
1131
+ /// character of the link, or remains unchanged if there is no valid link.
1132
+ fn parseTextAutolink (ip : * InlineParser ) ! void {
1133
+ const start = ip .pos ;
1134
+ var state : union (enum ) {
1135
+ /// Inside `http`. Contains the rest of the text to be matched.
1136
+ http : []const u8 ,
1137
+ after_http ,
1138
+ after_https ,
1139
+ /// Inside `://`. Contains the rest of the text to be matched.
1140
+ authority : []const u8 ,
1141
+ /// Inside link content.
1142
+ content : struct {
1143
+ start : usize ,
1144
+ paren_nesting : usize ,
1145
+ },
1146
+ } = .{ .http = "http" };
1147
+
1148
+ while (ip .pos < ip .content .len ) : (ip .pos += 1 ) {
1149
+ switch (state ) {
1150
+ .http = > | rest | {
1151
+ if (ip .content [ip .pos ] != rest [0 ]) break ;
1152
+ if (rest .len > 1 ) {
1153
+ state = .{ .http = rest [1.. ] };
1154
+ } else {
1155
+ state = .after_http ;
1156
+ }
1157
+ },
1158
+ .after_http = > switch (ip .content [ip .pos ]) {
1159
+ 's' = > state = .after_https ,
1160
+ ':' = > state = .{ .authority = "//" },
1161
+ else = > break ,
1162
+ },
1163
+ .after_https = > switch (ip .content [ip .pos ]) {
1164
+ ':' = > state = .{ .authority = "//" },
1165
+ else = > break ,
1166
+ },
1167
+ .authority = > | rest | {
1168
+ if (ip .content [ip .pos ] != rest [0 ]) break ;
1169
+ if (rest .len > 1 ) {
1170
+ state = .{ .authority = rest [1.. ] };
1171
+ } else {
1172
+ state = .{ .content = .{
1173
+ .start = ip .pos + 1 ,
1174
+ .paren_nesting = 0 ,
1175
+ } };
1176
+ }
1177
+ },
1178
+ .content = > | * content | switch (ip .content [ip .pos ]) {
1179
+ ' ' , '\t ' , '\n ' = > break ,
1180
+ '(' = > content .paren_nesting += 1 ,
1181
+ ')' = > if (content .paren_nesting == 0 ) {
1182
+ break ;
1183
+ } else {
1184
+ content .paren_nesting -= 1 ;
1185
+ },
1186
+ else = > {},
1187
+ },
1188
+ }
1189
+ }
1190
+
1191
+ switch (state ) {
1192
+ .http , .after_http , .after_https , .authority = > {
1193
+ ip .pos = start ;
1194
+ },
1195
+ .content = > | content | {
1196
+ while (ip .pos > content .start and isPostTextAutolink (ip .content [ip .pos - 1 ])) {
1197
+ ip .pos -= 1 ;
1198
+ }
1199
+ if (ip .pos == content .start ) {
1200
+ ip .pos = start ;
1201
+ return ;
1202
+ }
1203
+
1204
+ const target = try ip .parent .addString (ip .content [start .. ip .pos ]);
1205
+ const node = try ip .parent .addNode (.{
1206
+ .tag = .autolink ,
1207
+ .data = .{ .text = .{
1208
+ .content = target ,
1209
+ } },
1210
+ });
1211
+ try ip .completed_inlines .append (ip .parent .allocator , .{
1212
+ .node = node ,
1213
+ .start = start ,
1214
+ .len = ip .pos - start ,
1215
+ });
1216
+ ip .pos -= 1 ;
1217
+ },
1218
+ }
1219
+ }
1220
+
1221
+ /// Returns whether `c` may appear before a text autolink is recognized.
1222
+ fn isPreTextAutolink (c : u8 ) bool {
1223
+ return switch (c ) {
1224
+ ' ' , '\t ' , '\n ' , '*' , '_' , '(' = > true ,
1225
+ else = > false ,
1226
+ };
1227
+ }
1228
+
1229
+ /// Returns whether `c` is punctuation that may appear after a text autolink
1230
+ /// and not be considered part of it.
1231
+ fn isPostTextAutolink (c : u8 ) bool {
1232
+ return switch (c ) {
1233
+ '?' , '!' , '.' , ',' , ':' , '*' , '_' = > true ,
1234
+ else = > false ,
1235
+ };
1236
+ }
1237
+
1079
1238
/// Parses emphasis, starting at the beginning of a run of `*` or `_`
1080
1239
/// characters. `ip.pos` is left at the last character in the run after
1081
1240
/// parsing.
0 commit comments