1
- use std:: path:: { Path , PathBuf } ;
1
+ use std:: path:: Path ;
2
2
use tantivy:: { schema:: Schema , IndexWriter , doc, Term } ;
3
3
use anyhow:: Result ;
4
4
use async_trait:: async_trait;
5
5
use tokio:: fs;
6
6
use tokio:: task:: spawn_blocking;
7
7
use futures:: future:: BoxFuture ;
8
8
use std:: collections:: { HashSet , HashMap } ;
9
- use crate :: { indexes:: Indexable , sync_handle :: SyncHandle } ;
9
+ use crate :: indexes:: Indexable ;
10
10
use crate :: intelligence:: { TreeSitterFile , TSLanguage } ;
11
11
use crate :: symbol:: SymbolLocations ;
12
12
use crate :: schema:: build_schema;
@@ -58,20 +58,15 @@ impl File {
58
58
59
59
#[ async_trait]
60
60
impl Indexable for File {
61
- async fn index_repository ( & self , handle : & SyncHandle , root_path : & Path , writer : & IndexWriter ) -> Result < ( ) > {
61
+ async fn index_repository ( & self , root_path : & Path , writer : & IndexWriter ) -> Result < ( ) > {
62
62
let existing_docs = load_existing_docs ( writer, & self . hash_field , & self . path_field ) ?;
63
- let gitignore = parse_gitignore ( root_path) . await ?;
64
-
65
- // Print all paths in gitignore
66
- // println!("Paths in .gitignore:");
67
- // for pattern in &patterns {
68
- // println!("{}", pattern);
69
- // }
63
+ let mut gitignore = GlobSetBuilder :: new ( ) ;
70
64
71
65
traverse_and_index_files (
72
66
root_path, writer, & self . schema , self . path_field , self . content_field ,
73
67
self . symbol_locations_field , self . symbols_field , self . line_end_indices_field ,
74
- self . lang_field , self . hash_field , self . content_insensitive_field , & existing_docs, & gitignore, root_path) . await
68
+ self . lang_field , self . hash_field , self . content_insensitive_field ,
69
+ & existing_docs, & mut gitignore, root_path) . await
75
70
}
76
71
77
72
fn schema ( & self ) -> Schema {
@@ -98,21 +93,20 @@ fn load_existing_docs(writer: &IndexWriter, hash_field: &tantivy::schema::Field,
98
93
Ok ( existing_docs)
99
94
}
100
95
101
- async fn parse_gitignore ( repo_path : & Path ) -> Result < GlobSet > {
102
- let gitignore_path = repo_path. join ( ".gitignore" ) ;
103
- let mut builder = GlobSetBuilder :: new ( ) ;
96
+ async fn parse_gitignore ( current_path : & Path , builder : & mut GlobSetBuilder ) -> Result < ( ) > {
97
+ let gitignore_path = current_path. join ( ".gitignore" ) ;
104
98
105
99
if gitignore_path. exists ( ) {
106
100
let contents = tokio:: fs:: read_to_string ( & gitignore_path) . await ?;
107
101
for line in contents. lines ( ) {
108
102
let trimmed_line = line. trim ( ) ;
109
103
if !trimmed_line. starts_with ( '#' ) && !trimmed_line. is_empty ( ) {
110
- let mut absolute_pattern = if trimmed_line. starts_with ( '/' ) {
104
+ let absolute_pattern = if trimmed_line. starts_with ( '/' ) {
111
105
// The pattern is already an absolute path, so we just use it as is
112
- repo_path . join ( trimmed_line. trim_start_matches ( '/' ) )
106
+ current_path . join ( trimmed_line. trim_start_matches ( '/' ) )
113
107
} else {
114
- // The pattern is a relative path, so we join it with the repo path
115
- repo_path . join ( trimmed_line)
108
+ // The pattern is a relative path, so we join it with the current path
109
+ current_path . join ( trimmed_line)
116
110
} ;
117
111
let pattern = absolute_pattern. to_string_lossy ( ) . replace ( "\\ " , "/" ) ;
118
112
// println!("Adding to gitignore: {}", pattern);
@@ -121,9 +115,10 @@ async fn parse_gitignore(repo_path: &Path) -> Result<GlobSet> {
121
115
}
122
116
}
123
117
124
- Ok ( builder . build ( ) ? )
118
+ Ok ( ( ) )
125
119
}
126
120
121
+
127
122
fn traverse_and_index_files < ' a > (
128
123
path : & ' a Path ,
129
124
writer : & ' a IndexWriter ,
@@ -137,10 +132,16 @@ fn traverse_and_index_files<'a>(
137
132
hash_field : tantivy:: schema:: Field ,
138
133
content_insensitive_field : tantivy:: schema:: Field , // New field
139
134
existing_docs : & ' a HashMap < String , String > ,
140
- gitignore : & ' a GlobSet ,
135
+ gitignore : & ' a mut GlobSetBuilder ,
141
136
root_path : & ' a Path ,
142
137
) -> BoxFuture < ' a , Result < ( ) > > {
143
138
Box :: pin ( async move {
139
+ // Parse .gitignore in the current directory and update the builder
140
+ parse_gitignore ( path, gitignore) . await ?;
141
+
142
+ // Build the GlobSet from the builder
143
+ let globset = gitignore. build ( ) ?;
144
+
144
145
let mut entries = fs:: read_dir ( path) . await ?;
145
146
while let Some ( entry) = entries. next_entry ( ) . await ? {
146
147
let path = entry. path ( ) ;
@@ -150,7 +151,7 @@ fn traverse_and_index_files<'a>(
150
151
let absolute_path_str = absolute_path. to_string_lossy ( ) . replace ( "\\ " , "/" ) ;
151
152
152
153
// Skip paths that match .gitignore patterns
153
- if gitignore . is_match ( & absolute_path_str) {
154
+ if globset . is_match ( & absolute_path_str) {
154
155
continue ;
155
156
}
156
157
@@ -220,16 +221,11 @@ fn traverse_and_index_files<'a>(
220
221
let last_char_byte_index = content_str. chars ( ) . map ( |c| c. len_utf8 ( ) ) . sum :: < usize > ( ) ;
221
222
line_end_indices. extend_from_slice ( & u32:: to_le_bytes ( last_char_byte_index as u32 ) ) ;
222
223
223
-
224
-
225
224
// Convert content to lower case for case-insensitive search
226
225
let content_insensitive = content_str. to_lowercase ( ) ;
227
- // println!("{}", content_insensitive);
228
226
229
227
// println!("{}", absolute_path_str);
230
228
231
-
232
-
233
229
let doc = tantivy:: doc!(
234
230
path_field => path_str,
235
231
content_field => content_str,
@@ -246,4 +242,4 @@ fn traverse_and_index_files<'a>(
246
242
}
247
243
Ok ( ( ) )
248
244
} )
249
- }
245
+ }
0 commit comments