Skip to content

Commit eb58cf4

Browse files
committed
Refactor Separation of embedding logic through the DocumentTransformer
separated common logic process optimization
1 parent 10e1e13 commit eb58cf4

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright 2023 - 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.springframework.ai.transformer;
17+
18+
import java.util.List;
19+
import org.springframework.ai.document.Document;
20+
import org.springframework.ai.document.DocumentTransformer;
21+
import org.springframework.ai.embedding.EmbeddingModel;
22+
import reactor.core.publisher.Flux;
23+
import reactor.core.publisher.Mono;
24+
import reactor.core.scheduler.Schedulers;
25+
26+
/**
27+
* DocumentEmbeddingTransformer injects embedding values into each Document using the
28+
* EmbeddingModel if the Document does not already have embedding data.
29+
*
30+
* @author youngmon
31+
* @since 1.0.0
32+
*/
33+
public class DocumentEmbeddingTransformer implements DocumentTransformer {
34+
35+
private final EmbeddingModel embeddingModel;
36+
37+
public DocumentEmbeddingTransformer(EmbeddingModel embeddingModel) {
38+
this.embeddingModel = embeddingModel;
39+
}
40+
41+
/**
42+
* Embedding values are generated using the embedding model provided through the
43+
* constructor and then injected into each Document object.
44+
* @param documents to process.
45+
* @return processed documents
46+
*/
47+
@Override
48+
public List<Document> apply(List<Document> documents) {
49+
return Flux.fromIterable(documents).flatMap(document -> {
50+
if (document.getEmbedding() == null || document.getEmbedding().length == 0)
51+
return Mono
52+
.zip(Mono.just(document), Mono.fromCallable(() -> embeddingModel.embed(document)), (doc, embed) -> {
53+
doc.setEmbedding(embed);
54+
return doc;
55+
})
56+
.subscribeOn(Schedulers.boundedElastic());
57+
return Mono.just(document);
58+
}).collectList().block();
59+
}
60+
61+
}

0 commit comments

Comments
 (0)