Skip to content

Commit d3d077a

Browse files
authored
chore(web-crawler): drop unnecessary warn llms-full.txt not available (#4085)
1 parent c51495d commit d3d077a

File tree

1 file changed

+29
-34
lines changed

1 file changed

+29
-34
lines changed

ee/tabby-webserver/src/service/background_job/web_crawler.rs

+29-34
Original file line numberDiff line numberDiff line change
@@ -43,42 +43,37 @@ impl WebCrawlerJob {
4343
let mut num_docs = 0;
4444

4545
// attempt to fetch the LLMS file using crawler_llms.
46-
match crawler_llms(&self.url).await {
47-
Ok(docs) => {
48-
logkit::info!(
49-
"Fetched and split llms-full.txt successfully. Indexing {} sections.",
50-
docs.len()
51-
);
52-
// Index each section separately.
53-
for doc in docs {
54-
let source_doc = StructuredDoc {
55-
source_id: self.source_id.clone(),
56-
fields: StructuredDocFields::Web(StructuredDocWebFields {
57-
title: doc.metadata.title.unwrap_or_default(),
58-
link: doc.url,
59-
body: doc.markdown,
60-
}),
61-
};
62-
63-
if indexer
64-
.presync(&StructuredDocState {
65-
id: source_doc.id().to_string(),
66-
updated_at: Utc::now(),
67-
deleted: false,
68-
})
69-
.await
70-
{
71-
indexer.sync(source_doc).await;
72-
num_docs += 1;
73-
}
46+
if let Ok(docs) = crawler_llms(&self.url).await {
47+
logkit::info!(
48+
"Fetched and split llms-full.txt successfully. Indexing {} sections.",
49+
docs.len()
50+
);
51+
// Index each section separately.
52+
for doc in docs {
53+
let source_doc = StructuredDoc {
54+
source_id: self.source_id.clone(),
55+
fields: StructuredDocFields::Web(StructuredDocWebFields {
56+
title: doc.metadata.title.unwrap_or_default(),
57+
link: doc.url,
58+
body: doc.markdown,
59+
}),
60+
};
61+
62+
if indexer
63+
.presync(&StructuredDocState {
64+
id: source_doc.id().to_string(),
65+
updated_at: Utc::now(),
66+
deleted: false,
67+
})
68+
.await
69+
{
70+
indexer.sync(source_doc).await;
71+
num_docs += 1;
7472
}
75-
indexer.commit();
76-
logkit::info!("Indexed {} documents from '{}'", num_docs, self.url);
77-
return Ok(());
78-
}
79-
Err(_) => {
80-
logkit::info!("/llms-full.txt is not available");
8173
}
74+
indexer.commit();
75+
logkit::info!("Indexed {} documents from '{}'", num_docs, self.url);
76+
return Ok(());
8277
}
8378

8479
// if no LLMS file was found, use the regular crawl_pipeline.

0 commit comments

Comments
 (0)