Skip to content

Commit b2e7ef9

Browse files
committed
added get hoverable range and case insensitive text search
1 parent ded7b1d commit b2e7ef9

File tree

9 files changed

+245
-591
lines changed

9 files changed

+245
-591
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
/target
22
/index
3-
/test_files
3+
/test_files
4+
.aider*

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ zstd = "=0.12.4"
7979
zstd-safe = "=6.0.6"
8080

8181
zstd-sys = "=2.0.9+zstd.1.5.5"
82+
serde_json = "1.0.117"
8283

8384
[dev-dependencies]
8485
pretty_assertions = "1.4.0"

src/file.rs

+23-27
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
use std::path::{Path, PathBuf};
1+
use std::path::Path;
22
use tantivy::{schema::Schema, IndexWriter, doc, Term};
33
use anyhow::Result;
44
use async_trait::async_trait;
55
use tokio::fs;
66
use tokio::task::spawn_blocking;
77
use futures::future::BoxFuture;
88
use std::collections::{HashSet, HashMap};
9-
use crate::{indexes::Indexable, sync_handle::SyncHandle};
9+
use crate::indexes::Indexable;
1010
use crate::intelligence::{TreeSitterFile, TSLanguage};
1111
use crate::symbol::SymbolLocations;
1212
use crate::schema::build_schema;
@@ -58,20 +58,15 @@ impl File {
5858

5959
#[async_trait]
6060
impl Indexable for File {
61-
async fn index_repository(&self, handle: &SyncHandle, root_path: &Path, writer: &IndexWriter) -> Result<()> {
61+
async fn index_repository(&self, root_path: &Path, writer: &IndexWriter) -> Result<()> {
6262
let existing_docs = load_existing_docs(writer, &self.hash_field, &self.path_field)?;
63-
let gitignore = parse_gitignore(root_path).await?;
64-
65-
// Print all paths in gitignore
66-
// println!("Paths in .gitignore:");
67-
// for pattern in &patterns {
68-
// println!("{}", pattern);
69-
// }
63+
let mut gitignore = GlobSetBuilder::new();
7064

7165
traverse_and_index_files(
7266
root_path, writer, &self.schema, self.path_field, self.content_field,
7367
self.symbol_locations_field, self.symbols_field, self.line_end_indices_field,
74-
self.lang_field, self.hash_field, self.content_insensitive_field, &existing_docs, &gitignore, root_path).await
68+
self.lang_field, self.hash_field, self.content_insensitive_field,
69+
&existing_docs, &mut gitignore, root_path).await
7570
}
7671

7772
fn schema(&self) -> Schema {
@@ -98,21 +93,20 @@ fn load_existing_docs(writer: &IndexWriter, hash_field: &tantivy::schema::Field,
9893
Ok(existing_docs)
9994
}
10095

101-
async fn parse_gitignore(repo_path: &Path) -> Result<GlobSet> {
102-
let gitignore_path = repo_path.join(".gitignore");
103-
let mut builder = GlobSetBuilder::new();
96+
async fn parse_gitignore(current_path: &Path, builder: &mut GlobSetBuilder) -> Result<()> {
97+
let gitignore_path = current_path.join(".gitignore");
10498

10599
if gitignore_path.exists() {
106100
let contents = tokio::fs::read_to_string(&gitignore_path).await?;
107101
for line in contents.lines() {
108102
let trimmed_line = line.trim();
109103
if !trimmed_line.starts_with('#') && !trimmed_line.is_empty() {
110-
let mut absolute_pattern = if trimmed_line.starts_with('/') {
104+
let absolute_pattern = if trimmed_line.starts_with('/') {
111105
// The pattern is already an absolute path, so we just use it as is
112-
repo_path.join(trimmed_line.trim_start_matches('/'))
106+
current_path.join(trimmed_line.trim_start_matches('/'))
113107
} else {
114-
// The pattern is a relative path, so we join it with the repo path
115-
repo_path.join(trimmed_line)
108+
// The pattern is a relative path, so we join it with the current path
109+
current_path.join(trimmed_line)
116110
};
117111
let pattern = absolute_pattern.to_string_lossy().replace("\\", "/");
118112
// println!("Adding to gitignore: {}", pattern);
@@ -121,9 +115,10 @@ async fn parse_gitignore(repo_path: &Path) -> Result<GlobSet> {
121115
}
122116
}
123117

124-
Ok(builder.build()?)
118+
Ok(())
125119
}
126120

121+
127122
fn traverse_and_index_files<'a>(
128123
path: &'a Path,
129124
writer: &'a IndexWriter,
@@ -137,10 +132,16 @@ fn traverse_and_index_files<'a>(
137132
hash_field: tantivy::schema::Field,
138133
content_insensitive_field: tantivy::schema::Field, // New field
139134
existing_docs: &'a HashMap<String, String>,
140-
gitignore: &'a GlobSet,
135+
gitignore: &'a mut GlobSetBuilder,
141136
root_path: &'a Path,
142137
) -> BoxFuture<'a, Result<()>> {
143138
Box::pin(async move {
139+
// Parse .gitignore in the current directory and update the builder
140+
parse_gitignore(path, gitignore).await?;
141+
142+
// Build the GlobSet from the builder
143+
let globset = gitignore.build()?;
144+
144145
let mut entries = fs::read_dir(path).await?;
145146
while let Some(entry) = entries.next_entry().await? {
146147
let path = entry.path();
@@ -150,7 +151,7 @@ fn traverse_and_index_files<'a>(
150151
let absolute_path_str = absolute_path.to_string_lossy().replace("\\", "/");
151152

152153
// Skip paths that match .gitignore patterns
153-
if gitignore.is_match(&absolute_path_str) {
154+
if globset.is_match(&absolute_path_str) {
154155
continue;
155156
}
156157

@@ -220,16 +221,11 @@ fn traverse_and_index_files<'a>(
220221
let last_char_byte_index = content_str.chars().map(|c| c.len_utf8()).sum::<usize>();
221222
line_end_indices.extend_from_slice(&u32::to_le_bytes(last_char_byte_index as u32));
222223

223-
224-
225224
// Convert content to lower case for case-insensitive search
226225
let content_insensitive = content_str.to_lowercase();
227-
// println!("{}", content_insensitive);
228226

229227
// println!("{}", absolute_path_str);
230228

231-
232-
233229
let doc = tantivy::doc!(
234230
path_field => path_str,
235231
content_field => content_str,
@@ -246,4 +242,4 @@ fn traverse_and_index_files<'a>(
246242
}
247243
Ok(())
248244
})
249-
}
245+
}

src/indexes.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ use anyhow::{Context, Result};
33
use async_trait::async_trait;
44
use tantivy::{schema::Schema, Index, IndexReader, IndexWriter};
55
use tokio::sync::Mutex;
6-
use crate::{sync_handle::SyncHandle, file::File};
6+
use crate::file::File;
77

88
#[async_trait]
99
pub trait Indexable: Send + Sync {
10-
async fn index_repository(&self, handle: &SyncHandle, root_path: &Path, writer: &IndexWriter) -> Result<()>;
10+
async fn index_repository(&self, root_path: &Path, writer: &IndexWriter) -> Result<()>;
1111
fn schema(&self) -> Schema;
1212
}
1313

@@ -18,8 +18,8 @@ pub struct IndexWriteHandle<'a> {
1818
}
1919

2020
impl<'a> IndexWriteHandle<'a> {
21-
pub async fn index(&self, handle: &SyncHandle, root_path: &Path) -> Result<()> {
22-
self.source.index_repository(handle, root_path, &self.writer).await
21+
pub async fn index(&self, root_path: &Path) -> Result<()> {
22+
self.source.index_repository(root_path, &self.writer).await
2323
}
2424

2525
pub fn commit(&mut self) -> Result<()> {
@@ -105,9 +105,8 @@ impl Indexes {
105105

106106
pub async fn index(&self, root_path: &Path) -> Result<()> {
107107
let _write_lock = self.write_mutex.lock().await;
108-
let handle = SyncHandle::default();
109108
let mut writer = self.file.write_handle()?;
110-
writer.index(&handle, root_path).await?;
109+
writer.index( root_path).await?;
111110
writer.commit()?;
112111
Ok(())
113112
}

src/intelligence/code_navigation.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::{
66
content_document::ContentDocument,
77
snippet::{Snipper, Snippet},
88
text_range::TextRange,
9-
intelligence::{TreeSitterFile, NodeKind},
9+
intelligence::NodeKind,
1010
};
1111

1212
#[derive(Debug, Serialize)]

src/main.rs

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use anyhow::Result;
22
use std::path::Path;
33
use retreival::{indexes::Indexes, search::Searcher};
4+
use serde_json::json;
45

56
#[tokio::main]
67
async fn main() -> Result<()> {
7-
let root_path = Path::new("/Users/arnav/Desktop/r2/retreival");
8+
let root_path = Path::new("/Users/arnav/Desktop/devon/Devon");
89
// println!("{}", root_path.display());
9-
let index_path = Path::new("/Users/arnav/Desktop/r2/retreival/index");
10+
let index_path = Path::new("/Users/arnav/Desktop/devon/Devon/index");
1011

1112
let buffer_size_per_thread = 15_000_000;
1213
let num_threads = 4;
@@ -16,17 +17,23 @@ async fn main() -> Result<()> {
1617

1718
// // // // Create a searcher and perform a search
1819
let searcher = Searcher::new(&index_path)?;
19-
// let result = searcher.token_info("rust", "/Users/arnav/Desktop/r2/retreival/src/main.rs", 18, 19, 27);
20-
// println!("{:?}", result);
21-
// // // // let results = searcher.load_all_documents("rust");
22-
let result = searcher.text_search("indexes")?;
20+
21+
// let result = searcher.token_info("/Users/arnav/Desktop/r2/retreival/src/main.rs", 14, 18, 25);
22+
// match result {
23+
// Ok(token_info) => println!("{}", retreival::search::Searcher::format_token_info(token_info)),
24+
// Err(e) => println!("Error retrieving token info: {}", e),
25+
// }
26+
27+
let result = searcher.text_search("Agent", true)?;
28+
println!("{}", retreival::search::Searcher::format_search_results(result));
29+
30+
// let result = searcher.get_hoverable_ranges("/Users/arnav/Desktop/devon/Devon/devon_agent/tools/edittools.py")?;
31+
// println!("{}", json!(retreival::search::Searcher::format_hoverable_ranges(result)).to_string());
2332

2433
// println!("-");
2534
// // Print out the results
26-
for resul in result {
27-
println!("{:?}", resul.path);
28-
// println!("{:?}", resul.context);
29-
}
35+
36+
3037

3138

3239
Ok(())

0 commit comments

Comments
 (0)