-
Notifications
You must be signed in to change notification settings - Fork 99
/
Copy pathsanitization.rs
299 lines (249 loc) · 9.19 KB
/
sanitization.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
use std::borrow::Cow;
use pgt_text_size::TextSize;
use crate::CompletionParams;
pub(crate) struct SanitizedCompletionParams<'a> {
pub position: TextSize,
pub text: String,
pub schema: &'a pgt_schema_cache::SchemaCache,
pub tree: Cow<'a, tree_sitter::Tree>,
}
pub fn benchmark_sanitization(params: CompletionParams) -> String {
let params: SanitizedCompletionParams = params.try_into().unwrap();
params.text
}
impl<'larger, 'smaller> From<CompletionParams<'larger>> for SanitizedCompletionParams<'smaller>
where
'larger: 'smaller,
{
fn from(params: CompletionParams<'larger>) -> Self {
if cursor_inbetween_nodes(params.tree, params.position)
|| cursor_prepared_to_write_token_after_last_node(params.tree, params.position)
|| cursor_before_semicolon(params.tree, params.position)
{
SanitizedCompletionParams::with_adjusted_sql(params)
} else {
SanitizedCompletionParams::unadjusted(params)
}
}
}
static SANITIZED_TOKEN: &str = "REPLACED_TOKEN";
impl<'larger, 'smaller> SanitizedCompletionParams<'smaller>
where
'larger: 'smaller,
{
fn with_adjusted_sql(params: CompletionParams<'larger>) -> Self {
let cursor_pos: usize = params.position.into();
let mut sql = String::new();
let mut sql_iter = params.text.chars();
for idx in 0..cursor_pos + 1 {
match sql_iter.next() {
Some(c) => {
if idx == cursor_pos {
sql.push_str(SANITIZED_TOKEN);
sql.push(' ');
}
sql.push(c);
}
None => {
// the cursor is outside the statement,
// we want to push spaces until we arrive at the cursor position.
// we'll then add the SANITIZED_TOKEN
if idx == cursor_pos {
sql.push_str(SANITIZED_TOKEN);
} else {
sql.push(' ');
}
}
}
}
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_sql::language())
.expect("Error loading sql language");
let tree = parser.parse(sql.clone(), None).unwrap();
Self {
position: params.position,
text: sql,
schema: params.schema,
tree: Cow::Owned(tree),
}
}
fn unadjusted(params: CompletionParams<'larger>) -> Self {
Self {
position: params.position,
text: params.text.clone(),
schema: params.schema,
tree: Cow::Borrowed(params.tree),
}
}
pub fn is_sanitized_token(txt: &str) -> bool {
txt == SANITIZED_TOKEN
}
}
/// Checks if the cursor is positioned inbetween two SQL nodes.
///
/// ```sql
/// select| from users; -- cursor "touches" select node. returns false.
/// select |from users; -- cursor "touches" from node. returns false.
/// select | from users; -- cursor is between select and from nodes. returns true.
/// ```
fn cursor_inbetween_nodes(tree: &tree_sitter::Tree, position: TextSize) -> bool {
let mut cursor = tree.walk();
let mut leaf_node = tree.root_node();
let byte = position.into();
// if the cursor escapes the root node, it can't be between nodes.
if byte < leaf_node.start_byte() || byte >= leaf_node.end_byte() {
return false;
}
/*
* Get closer and closer to the leaf node, until
* a) there is no more child *for the node* or
* b) there is no more child *under the cursor*.
*/
loop {
let child_idx = cursor.goto_first_child_for_byte(position.into());
if child_idx.is_none() {
break;
}
leaf_node = cursor.node();
}
let cursor_on_leafnode = byte >= leaf_node.start_byte() && leaf_node.end_byte() >= byte;
/*
* The cursor is inbetween nodes if it is not within the range
* of a leaf node.
*/
!cursor_on_leafnode
}
/// Checks if the cursor is positioned after the last node,
/// ready to write the next token:
///
/// ```sql
/// select * from | -- ready to write!
/// select * from| -- user still needs to type a space
/// select * from | -- too far off.
/// ```
fn cursor_prepared_to_write_token_after_last_node(
tree: &tree_sitter::Tree,
position: TextSize,
) -> bool {
let cursor_pos: usize = position.into();
cursor_pos == tree.root_node().end_byte() + 1
}
fn cursor_before_semicolon(tree: &tree_sitter::Tree, position: TextSize) -> bool {
let mut cursor = tree.walk();
let mut leaf_node = tree.root_node();
let byte: usize = position.into();
// if the cursor escapes the root node, it can't be between nodes.
if byte < leaf_node.start_byte() || byte >= leaf_node.end_byte() {
return false;
}
loop {
let child_idx = cursor.goto_first_child_for_byte(position.into());
if child_idx.is_none() {
break;
}
leaf_node = cursor.node();
}
// The semicolon node is on the same level as the statement:
//
// program [0..26]
// statement [0..19]
// ; [25..26]
//
// However, if we search for position 21, we'll still land on the semi node.
// We must manually verify that the cursor is between the statement and the semi nodes.
// if the last node is not a semi, the statement is not completed.
if leaf_node.kind() != ";" {
return false;
}
// not okay to be on the semi.
if byte == leaf_node.start_byte() {
return false;
}
leaf_node
.prev_named_sibling()
.map(|n| n.end_byte() < byte)
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use pgt_text_size::TextSize;
use crate::sanitization::{
cursor_before_semicolon, cursor_inbetween_nodes,
cursor_prepared_to_write_token_after_last_node,
};
#[test]
fn test_cursor_inbetween_nodes() {
// note: two spaces between select and from.
let input = "select from users;";
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_sql::language())
.expect("Error loading sql language");
let mut tree = parser.parse(input.to_string(), None).unwrap();
// select | from users; <-- just right, one space after select token, one space before from
assert!(cursor_inbetween_nodes(&mut tree, TextSize::new(7)));
// select| from users; <-- still on select token
assert!(!cursor_inbetween_nodes(&mut tree, TextSize::new(6)));
// select |from users; <-- already on from token
assert!(!cursor_inbetween_nodes(&mut tree, TextSize::new(8)));
// select from users;|
assert!(!cursor_inbetween_nodes(&mut tree, TextSize::new(19)));
}
#[test]
fn test_cursor_after_nodes() {
let input = "select * from";
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_sql::language())
.expect("Error loading sql language");
let mut tree = parser.parse(input.to_string(), None).unwrap();
// select * from| <-- still on previous token
assert!(!cursor_prepared_to_write_token_after_last_node(
&mut tree,
TextSize::new(13)
));
// select * from | <-- too far off, two spaces afterward
assert!(!cursor_prepared_to_write_token_after_last_node(
&mut tree,
TextSize::new(15)
));
// select * |from <-- it's within
assert!(!cursor_prepared_to_write_token_after_last_node(
&mut tree,
TextSize::new(9)
));
// select * from | <-- just right
assert!(cursor_prepared_to_write_token_after_last_node(
&mut tree,
TextSize::new(14)
));
}
#[test]
fn test_cursor_before_semicolon() {
// Idx "13" is the exlusive end of `select * from` (first space after from)
// Idx "18" is right where the semi is
let input = "select * from ;";
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_sql::language())
.expect("Error loading sql language");
let mut tree = parser.parse(input.to_string(), None).unwrap();
// select * from ;| <-- it's after the statement
assert!(!cursor_before_semicolon(&mut tree, TextSize::new(19)));
// select * from| ; <-- still touches the from
assert!(!cursor_before_semicolon(&mut tree, TextSize::new(13)));
// not okay to be ON the semi.
// select * from |;
assert!(!cursor_before_semicolon(&mut tree, TextSize::new(18)));
// anything is fine here
// select * from | ;
// select * from | ;
// select * from | ;
// select * from |;
assert!(cursor_before_semicolon(&mut tree, TextSize::new(14)));
assert!(cursor_before_semicolon(&mut tree, TextSize::new(15)));
assert!(cursor_before_semicolon(&mut tree, TextSize::new(16)));
assert!(cursor_before_semicolon(&mut tree, TextSize::new(17)));
}
}