@@ -43,42 +43,37 @@ impl WebCrawlerJob {
43
43
let mut num_docs = 0 ;
44
44
45
45
// attempt to fetch the LLMS file using crawler_llms.
46
- match crawler_llms ( & self . url ) . await {
47
- Ok ( docs) => {
48
- logkit:: info!(
49
- "Fetched and split llms-full.txt successfully. Indexing {} sections." ,
50
- docs. len( )
51
- ) ;
52
- // Index each section separately.
53
- for doc in docs {
54
- let source_doc = StructuredDoc {
55
- source_id : self . source_id . clone ( ) ,
56
- fields : StructuredDocFields :: Web ( StructuredDocWebFields {
57
- title : doc. metadata . title . unwrap_or_default ( ) ,
58
- link : doc. url ,
59
- body : doc. markdown ,
60
- } ) ,
61
- } ;
62
-
63
- if indexer
64
- . presync ( & StructuredDocState {
65
- id : source_doc. id ( ) . to_string ( ) ,
66
- updated_at : Utc :: now ( ) ,
67
- deleted : false ,
68
- } )
69
- . await
70
- {
71
- indexer. sync ( source_doc) . await ;
72
- num_docs += 1 ;
73
- }
46
+ if let Ok ( docs) = crawler_llms ( & self . url ) . await {
47
+ logkit:: info!(
48
+ "Fetched and split llms-full.txt successfully. Indexing {} sections." ,
49
+ docs. len( )
50
+ ) ;
51
+ // Index each section separately.
52
+ for doc in docs {
53
+ let source_doc = StructuredDoc {
54
+ source_id : self . source_id . clone ( ) ,
55
+ fields : StructuredDocFields :: Web ( StructuredDocWebFields {
56
+ title : doc. metadata . title . unwrap_or_default ( ) ,
57
+ link : doc. url ,
58
+ body : doc. markdown ,
59
+ } ) ,
60
+ } ;
61
+
62
+ if indexer
63
+ . presync ( & StructuredDocState {
64
+ id : source_doc. id ( ) . to_string ( ) ,
65
+ updated_at : Utc :: now ( ) ,
66
+ deleted : false ,
67
+ } )
68
+ . await
69
+ {
70
+ indexer. sync ( source_doc) . await ;
71
+ num_docs += 1 ;
74
72
}
75
- indexer. commit ( ) ;
76
- logkit:: info!( "Indexed {} documents from '{}'" , num_docs, self . url) ;
77
- return Ok ( ( ) ) ;
78
- }
79
- Err ( _) => {
80
- logkit:: info!( "/llms-full.txt is not available" ) ;
81
73
}
74
+ indexer. commit ( ) ;
75
+ logkit:: info!( "Indexed {} documents from '{}'" , num_docs, self . url) ;
76
+ return Ok ( ( ) ) ;
82
77
}
83
78
84
79
// if no LLMS file was found, use the regular crawl_pipeline.
0 commit comments