From 694d2998aaddfe0ac40315f28a079429388f8ded Mon Sep 17 00:00:00 2001
From: Andrew Gallant <jamslam@gmail.com>
Date: Wed, 8 Jan 2025 12:49:10 -0500
Subject: [PATCH] fix: Remove false positive line trimming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, it was possible for a `...` to be inserted when no trimming
was actually done. For example:

```
  |
1 |   version = "0.1.0"
2 |   # Ensure that the spans from toml handle utf-8 correctly
3 |   authors = [
  |  ___________^
4 | |     { name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙...A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
5 | | ]
  | |_^ RUF200
  |
```

After this fix, the `...` is no longer inserted:

```
  |
1 |   version = "0.1.0"
2 |   # Ensure that the spans from toml handle utf-8 correctly
3 |   authors = [
  |  ___________^
4 | |     { name = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘", email = 1 }
5 | | ]
  | |_^ RUF200
  |
```

This is another low confidence fix where I'm not sure that it's right.
But the implementation was previously seeming to conflate the line
length (in bytes) versus the actual rendered length. So instead of
trying to do some math to figure out whether an ellipsis should be
inserted, we just keep track of whether the code line we write was
truncated or not.
---
 src/renderer/display_list.rs | 34 +++++++++++++++++-----------------
 src/renderer/margin.rs       | 12 ------------
 tests/formatter.rs           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/src/renderer/display_list.rs b/src/renderer/display_list.rs
index c2cb82b..da24c70 100644
--- a/src/renderer/display_list.rs
+++ b/src/renderer/display_list.rs
@@ -331,28 +331,28 @@ impl DisplaySet<'_> {
 
                     // On long lines, we strip the source line, accounting for unicode.
                     let mut taken = 0;
-                    let code: String = text
-                        .chars()
-                        .skip(left)
-                        .take_while(|ch| {
-                            // Make sure that the trimming on the right will fall within the terminal width.
-                            // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char`
-                            // is. For now, just accept that sometimes the code line will be longer than
-                            // desired.
-                            let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1);
-                            if taken + next > right - left {
-                                return false;
-                            }
-                            taken += next;
-                            true
-                        })
-                        .collect();
+                    let mut was_cut_right = false;
+                    let mut code = String::new();
+                    for ch in text.chars().skip(left) {
+                        // Make sure that the trimming on the right will fall within the terminal width.
+                        // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char`
+                        // is. For now, just accept that sometimes the code line will be longer than
+                        // desired.
+                        let next = unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1);
+                        if taken + next > right - left {
+                            was_cut_right = true;
+                            break;
+                        }
+                        taken += next;
+                        code.push(ch);
+                    }
+
                     buffer.puts(line_offset, code_offset, &code, Style::new());
                     if self.margin.was_cut_left() {
                         // We have stripped some code/whitespace from the beginning, make it clear.
                         buffer.puts(line_offset, code_offset, "...", *lineno_color);
                     }
-                    if self.margin.was_cut_right(line_len) {
+                    if was_cut_right {
                         buffer.puts(line_offset, code_offset + taken - 3, "...", *lineno_color);
                     }
 
diff --git a/src/renderer/margin.rs b/src/renderer/margin.rs
index c484416..59bd550 100644
--- a/src/renderer/margin.rs
+++ b/src/renderer/margin.rs
@@ -58,18 +58,6 @@ impl Margin {
         self.computed_left > 0
     }
 
-    pub(crate) fn was_cut_right(&self, line_len: usize) -> bool {
-        let right =
-            if self.computed_right == self.span_right || self.computed_right == self.label_right {
-                // Account for the "..." padding given above. Otherwise we end up with code lines that
-                // do fit but end in "..." as if they were trimmed.
-                self.computed_right - ELLIPSIS_PASSING
-            } else {
-                self.computed_right
-            };
-        right < line_len && self.computed_left + self.term_width < line_len
-    }
-
     fn compute(&mut self, max_line_len: usize) {
         // When there's a lot of whitespace (>20), we want to trim it as it is useless.
         self.computed_left = if self.whitespace_left > LONG_WHITESPACE {
diff --git a/tests/formatter.rs b/tests/formatter.rs
index 6faab76..f8e97b9 100644
--- a/tests/formatter.rs
+++ b/tests/formatter.rs
@@ -2,6 +2,36 @@ use annotate_snippets::{Level, Renderer, Snippet};
 
 use snapbox::{assert_data_eq, str};
 
+// This tests that an ellipsis is not inserted into Unicode text when a line
+// wasn't actually trimmed.
+//
+// This is a regression test where `...` was inserted because the code wasn't
+// properly accounting for the *rendered* length versus the length in bytes in
+// all cases.
+#[test]
+fn unicode_cut_handling() {
+    let source = "version = \"0.1.0\"\n# Ensure that the spans from toml handle utf-8 correctly\nauthors = [\n    { name = \"Z\u{351}\u{36b}\u{343}\u{36a}\u{302}\u{36b}\u{33d}\u{34f}\u{334}\u{319}\u{324}\u{31e}\u{349}\u{35a}\u{32f}\u{31e}\u{320}\u{34d}A\u{36b}\u{357}\u{334}\u{362}\u{335}\u{31c}\u{330}\u{354}L\u{368}\u{367}\u{369}\u{358}\u{320}G\u{311}\u{357}\u{30e}\u{305}\u{35b}\u{341}\u{334}\u{33b}\u{348}\u{34d}\u{354}\u{339}O\u{342}\u{30c}\u{30c}\u{358}\u{328}\u{335}\u{339}\u{33b}\u{31d}\u{333}\", email = 1 }\n]\n";
+    let input = Level::Error.title("title").snippet(
+        Snippet::source(source)
+            .fold(false)
+            .annotation(Level::Error.span(85..228).label("annotation")),
+    );
+    let expected = "\
+error: title
+  |
+1 |   version = \"0.1.0\"
+2 |   # Ensure that the spans from toml handle utf-8 correctly
+3 |   authors = [
+  |  ___________^
+4 | |     { name = \"Z\u{351}\u{36b}\u{343}\u{36a}\u{302}\u{36b}\u{33d}\u{34f}\u{334}\u{319}\u{324}\u{31e}\u{349}\u{35a}\u{32f}\u{31e}\u{320}\u{34d}A\u{36b}\u{357}\u{334}\u{362}\u{335}\u{31c}\u{330}\u{354}L\u{368}\u{367}\u{369}\u{358}\u{320}G\u{311}\u{357}\u{30e}\u{305}\u{35b}\u{341}\u{334}\u{33b}\u{348}\u{34d}\u{354}\u{339}O\u{342}\u{30c}\u{30c}\u{358}\u{328}\u{335}\u{339}\u{33b}\u{31d}\u{333}\", email = 1 }
+5 | | ]
+  | |_^ annotation
+  |\
+";
+    let renderer = Renderer::plain();
+    assert_data_eq!(renderer.render(input).to_string(), expected);
+}
+
 #[test]
 fn test_i_29() {
     let snippets = Level::Error.title("oops").snippet(