Skip to content

Commit 3fc4dce

Browse files
committed
refactor: Address position issues with unicode text
1 parent ebf5466 commit 3fc4dce

File tree

3 files changed

+131
-98
lines changed

3 files changed

+131
-98
lines changed

src/bors/handlers/trybuild.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::github::{
2020
CommitSha, GithubUser, LabelTrigger, MergeError, PullRequest, PullRequestNumber,
2121
};
2222
use crate::permissions::PermissionType;
23-
use crate::utils::suppress_github_mentions;
23+
use crate::utils::text::suppress_github_mentions;
2424

2525
use super::deny_request;
2626
use super::has_permission;

src/utils/mod.rs

Lines changed: 1 addition & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,3 @@
11
pub mod logging;
2+
pub mod text;
23
pub mod timing;
3-
4-
/// Converts GitHub @mentions to markdown-backticked text to prevent notifications.
5-
/// For example, "@user" becomes "`user`".
6-
///
7-
/// Handles GitHub mention formats:
8-
/// - Usernames (@username)
9-
/// - Teams (@org/team)
10-
/// - Nested teams (@org/team/subteam)
11-
///
12-
/// GitHub's nested team documentation:
13-
/// https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams#nested-teams
14-
///
15-
/// Ignores email addresses and other @ symbols that don't match GitHub mention patterns.
16-
pub fn suppress_github_mentions(text: &str) -> String {
17-
if text.is_empty() || !text.contains('@') {
18-
return text.to_string();
19-
}
20-
21-
let segment = r"[A-Za-z0-9][A-Za-z0-9\-]{0,38}";
22-
let pattern = format!(r"@{0}(?:/{0})*", segment);
23-
24-
let re = regex::Regex::new(&pattern).unwrap();
25-
re.replace_all(text, |caps: &regex::Captures| {
26-
let mention = &caps[0];
27-
let position = caps.get(0).unwrap().start();
28-
29-
if !is_github_mention(text, mention, position) {
30-
return mention.to_string();
31-
}
32-
33-
let name = &mention[1..]; // Drop the @ symbol
34-
format!("`{}`", name)
35-
})
36-
.to_string()
37-
}
38-
39-
// Determines if a potential mention would actually trigger a notification
40-
fn is_github_mention(text: &str, mention: &str, pos: usize) -> bool {
41-
// Not a valid mention if preceded by alphanumeric or underscore (email)
42-
if pos > 0 {
43-
let c = text.chars().nth(pos - 1).unwrap();
44-
if c.is_alphanumeric() || c == '_' {
45-
return false;
46-
}
47-
}
48-
49-
// Check if followed by invalid character
50-
let end = pos + mention.len();
51-
if end < text.len() {
52-
let next_char = text.chars().nth(end).unwrap();
53-
if next_char.is_alphanumeric() || next_char == '_' || next_char == '-' {
54-
return false;
55-
}
56-
}
57-
58-
true
59-
}
60-
61-
#[cfg(test)]
62-
mod tests {
63-
use super::*;
64-
65-
#[test]
66-
fn test_suppress_github_mentions() {
67-
// User mentions
68-
assert_eq!(suppress_github_mentions("Hello @user"), "Hello `user`");
69-
70-
// Org team mentions
71-
assert_eq!(suppress_github_mentions("@org/team"), "`org/team`");
72-
assert_eq!(
73-
suppress_github_mentions("@org/team/subteam"),
74-
"`org/team/subteam`"
75-
);
76-
assert_eq!(
77-
suppress_github_mentions("@big/team/sub/group"),
78-
"`big/team/sub/group`"
79-
);
80-
assert_eq!(
81-
suppress_github_mentions("Thanks @user, @rust-lang/libs and @github/docs/content!"),
82-
"Thanks `user`, `rust-lang/libs` and `github/docs/content`!"
83-
);
84-
85-
// Non mentions
86-
assert_eq!(suppress_github_mentions("@"), "@");
87-
assert_eq!(suppress_github_mentions(""), "");
88-
assert_eq!(
89-
suppress_github_mentions("No mentions here"),
90-
"No mentions here"
91-
);
92-
assert_eq!(
93-
suppress_github_mentions("[email protected]"),
94-
95-
);
96-
97-
assert_eq!(suppress_github_mentions("@user_test"), "@user_test");
98-
}
99-
}

src/utils/text.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
use regex::{Captures, Regex};
2+
3+
/// Replaces valid GitHub @mentions with backticks to prevent accidental pings
4+
///
5+
/// For example:
6+
/// "@user" -> "`@user`".
7+
/// "@org/team" -> "`@org/team`".
8+
/// "@org/team/subteam" -> "`@org/team/subteam`".
9+
pub fn suppress_github_mentions(text: &str) -> String {
10+
if text.is_empty() || !text.contains('@') {
11+
return text.to_string();
12+
}
13+
14+
let segment = r"[a-zA-Z0-9][a-zA-Z0-9\-]{0,38}";
15+
let pattern = format!(r"@{0}(?:/{0})*", segment);
16+
17+
let re = Regex::new(&pattern).unwrap();
18+
re.replace_all(text, |caps: &Captures| {
19+
let entry = caps.get(0).unwrap();
20+
let mention = entry.as_str();
21+
let start = entry.start();
22+
let end = entry.end();
23+
24+
if !is_valid_mention_context(text, start, end) {
25+
return mention.to_string();
26+
}
27+
28+
format!("`{mention}`")
29+
})
30+
.to_string()
31+
}
32+
33+
/// Validates mention boundaries according to GitHub's autolinking rules
34+
///
35+
/// A mention is considered valid if:
36+
/// 1. Preceded by non-word character (or start of string)
37+
/// 2. Followed by non-word character (or end of string)
38+
///
39+
/// ref: https://github.com/rust-lang/homu/pull/230
40+
fn is_valid_mention_context(text: &str, start: usize, end: usize) -> bool {
41+
// Check preceding boundary
42+
if start > 0 {
43+
let preceding_char = text[..start].chars().last();
44+
if let Some(c) = preceding_char {
45+
if c.is_alphanumeric() || c == '_' {
46+
return false;
47+
}
48+
}
49+
}
50+
51+
// Check following boundary
52+
if end < text.len() {
53+
let following_char = text[end..].chars().next();
54+
if let Some(c) = following_char {
55+
if c.is_alphanumeric() || c == '_' || c == '-' {
56+
return false;
57+
}
58+
}
59+
}
60+
61+
true
62+
}
63+
64+
#[cfg(test)]
65+
mod tests {
66+
use super::*;
67+
68+
#[test]
69+
fn basic_mentions() {
70+
assert_eq!(suppress_github_mentions("Hello @user"), "Hello `@user`");
71+
assert_eq!(
72+
suppress_github_mentions("Ping @developer"),
73+
"Ping `@developer`"
74+
);
75+
assert_eq!(
76+
suppress_github_mentions("Multiple @user1 and @user2"),
77+
"Multiple `@user1` and `@user2`"
78+
);
79+
}
80+
81+
#[test]
82+
fn team_mentions() {
83+
assert_eq!(suppress_github_mentions("@org/team"), "`@org/team`");
84+
assert_eq!(
85+
suppress_github_mentions("@rust-lang/libs"),
86+
"`@rust-lang/libs`"
87+
);
88+
assert_eq!(
89+
suppress_github_mentions("@org/team/subteam"),
90+
"`@org/team/subteam`"
91+
);
92+
}
93+
94+
#[test]
95+
fn mention_boundaries() {
96+
// Adjacent punctuation
97+
assert_eq!(
98+
suppress_github_mentions("Hello,@user! How are you?"),
99+
"Hello,`@user`! How are you?"
100+
);
101+
102+
// Email addresses
103+
assert_eq!(
104+
suppress_github_mentions("[email protected]"),
105+
106+
);
107+
108+
// Invalid mentions
109+
assert_eq!(suppress_github_mentions("@-user"), "@-user");
110+
assert_eq!(suppress_github_mentions("word@user"), "word@user");
111+
assert_eq!(suppress_github_mentions("@user_next"), "@user_next");
112+
}
113+
114+
#[test]
115+
fn edge_cases() {
116+
// Empty input
117+
assert_eq!(suppress_github_mentions(""), "");
118+
119+
// Minimum valid mention
120+
assert_eq!(suppress_github_mentions("@a"), "`@a`");
121+
122+
// Maximum length mention
123+
let long_mention = "@".to_string() + &"a".repeat(39);
124+
assert_eq!(
125+
suppress_github_mentions(&long_mention),
126+
format!("`{long_mention}`")
127+
);
128+
}
129+
}

0 commit comments

Comments
 (0)