Skip to content

Commit bbf5061

Browse files
farooqkzSimon-Laux
andauthored
Parse IRI links (#57)
* upgrade rust toolchain to 1.77.2 * don't parse internal markdown links. see #66 in the repo for details * move parenthesis, bracket and angle parsing into dedicated function * fix parenthesis in target of labeled link --------- Co-authored-by: Simon Laux <[email protected]>
1 parent 2d03478 commit bbf5061

33 files changed

+2206
-872
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- uses: actions-rs/toolchain@v1
1818
with:
1919
profile: minimal
20-
toolchain: 1.64.0
20+
toolchain: 1.77.2
2121
override: true
2222
- run: rustup component add rustfmt
2323
- uses: actions-rs/cargo@v1
@@ -31,7 +31,7 @@ jobs:
3131
- uses: actions/checkout@v2
3232
- uses: actions-rs/toolchain@v1
3333
with:
34-
toolchain: 1.64.0
34+
toolchain: 1.77.2
3535
components: clippy
3636
override: true
3737
- uses: actions-rs/clippy-check@v1
@@ -68,9 +68,9 @@ jobs:
6868
matrix:
6969
include:
7070
- os: ubuntu-latest
71-
rust: 1.64.0
71+
rust: 1.77.2
7272
- os: windows-latest
73-
rust: 1.64.0
73+
rust: 1.77.2
7474
runs-on: ${{ matrix.os }}
7575
steps:
7676
- uses: actions/checkout@master

benches/moar_links.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Let's add some more links just for testing and benching:
2+
3+
these are some IPv6 links:
4+
5+
gopher://[::1]/
6+
https://[::1]/سلام
7+
https://[2345:0425:2CA1:0000:0000:0567:5673:23b5]/hello_world
8+
https://[2345:425:2CA1:0:0:0567:5673:23b5]/hello_world
9+
10+
an IPvfuture link:
11+
ftp://mrchickenkiller@[vA.A]/var/log/boot.log
12+
13+
some normal links:
14+
15+
https://www.ietf.org/rfc/rfc3987.txt
16+
https://iamb.chat/messages/index.html
17+
https://github.com/deltachat/message-parser/issues/67
18+
https://far.chickenkiller.com
19+
gopher://republic.circumlunar.space
20+
https://far.chickenkiller.com/religion/a-god-who-does-not-care/

benches/my_benchmark.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2-
use deltachat_message_parser::parser::{parse_desktop_set, parse_markdown_text, parse_only_text};
2+
use deltachat_message_parser::parser::{
3+
parse_desktop_set, parse_markdown_text, parse_only_text, LinkDestination,
4+
};
35

46
pub fn criterion_benchmark(c: &mut Criterion) {
57
let testdata = include_str!("testdata.md");
68
let lorem_ipsum_txt = include_str!("lorem_ipsum.txt");
79
let r10s_update_message = include_str!("r10s_update_message.txt");
10+
let links = include_str!("moar_links.txt");
811

912
c.bench_function("only_text_lorem_ipsum.txt", |b| {
1013
b.iter(|| parse_only_text(black_box(lorem_ipsum_txt)))
@@ -35,6 +38,10 @@ pub fn criterion_benchmark(c: &mut Criterion) {
3538
c.bench_function("markdown_r10s_update_message.txt", |b| {
3639
b.iter(|| parse_markdown_text(black_box(r10s_update_message)))
3740
});
41+
42+
c.bench_function("parse_link_moar_links.txt", |b| {
43+
b.iter(|| LinkDestination::parse(black_box(links)))
44+
});
3845
}
3946

4047
criterion_group!(benches, criterion_benchmark);

benches/testdata.md

Lines changed: 760 additions & 0 deletions
Large diffs are not rendered by default.

message_parser_wasm/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pub fn parse_text(s: &str, enable_markdown: bool) -> JsValue {
2323
serde_wasm_bindgen::to_value(&ast).expect("Element converts to JsValue")
2424
}
2525

26-
/// parses text to json AST (text elements and labled links, to replicate current desktop implementation)
26+
/// parses text to json AST (text elements and labeled links, to replicate current desktop implementation)
2727
#[wasm_bindgen]
2828
pub fn parse_desktop_set(s: &str) -> JsValue {
2929
serde_wasm_bindgen::to_value(&deltachat_message_parser::parser::parse_desktop_set(s))

rust-toolchain

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.64.0
1+
1.77.2

spec.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Make email addresses clickable, opens the chat with that contact and creates it
4141
Make URLs clickable.
4242

4343
- detect all valid hyperlink URLs that have the `://` (protocol://host).
44+
- according to [RFC3987](https://www.rfc-editor.org/rfc/rfc3987) and [RFC3988](https://www.rfc-editor.org/rfc/rfc3988)
4445

4546
- other links like `mailto:` (note there is just a single `:`, no `://`) will get separate parsing that includes a whitelisted protocol name, otherwise there will likely be unexpected behavior if user types `hello:world` - will be recognized as link.
4647

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
clippy::get_last_with_len,
1010
clippy::get_unwrap,
1111
clippy::get_unwrap,
12-
clippy::integer_arithmetic,
12+
clippy::arithmetic_side_effects,
1313
clippy::match_on_vec_items,
1414
clippy::match_wild_err_arm,
1515
clippy::missing_panics_doc,

0 commit comments

Comments
 (0)