Skip to content

Commit 63ac128

Browse files
ngxsonggerganov
andauthored
server : add TEI API format for /rerank endpoint (ggml-org#11942)
* server : add TEI API format for /rerank endpoint * Apply suggestions from code review Co-authored-by: Georgi Gerganov <[email protected]> * fix * also gitignore examples/server/*.gz.hpp --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 5137da7 commit 63ac128

File tree

4 files changed

+88
-28
lines changed

4 files changed

+88
-28
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ examples/server/*.css.hpp
9898
examples/server/*.html.hpp
9999
examples/server/*.js.hpp
100100
examples/server/*.mjs.hpp
101+
examples/server/*.gz.hpp
101102
!build_64.sh
102103
!examples/*.bat
103104
!examples/*/*.kts

examples/server/server.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -4263,6 +4263,11 @@ int main(int argc, char ** argv) {
42634263
// return;
42644264
//}
42654265

4266+
// if true, use TEI API format, otherwise use Jina API format
4267+
// Jina: https://jina.ai/reranker/
4268+
// TEI: https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/rerank
4269+
bool is_tei_format = body.contains("texts");
4270+
42664271
json query;
42674272
if (body.count("query") == 1) {
42684273
query = body.at("query");
@@ -4275,7 +4280,8 @@ int main(int argc, char ** argv) {
42754280
return;
42764281
}
42774282

4278-
std::vector<std::string> documents = json_value(body, "documents", std::vector<std::string>());
4283+
std::vector<std::string> documents = json_value(body, "documents",
4284+
json_value(body, "texts", std::vector<std::string>()));
42794285
if (documents.empty()) {
42804286
res_error(res, format_error_response("\"documents\" must be a non-empty string array", ERROR_TYPE_INVALID_REQUEST));
42814287
return;
@@ -4320,7 +4326,12 @@ int main(int argc, char ** argv) {
43204326
}
43214327

43224328
// write JSON response
4323-
json root = format_response_rerank(body, responses);
4329+
json root = format_response_rerank(
4330+
body,
4331+
responses,
4332+
is_tei_format,
4333+
documents);
4334+
43244335
res_ok(res, root);
43254336
};
43264337

examples/server/tests/unit/test_rerank.py

+32-6
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,20 @@ def create_server():
1010
server = ServerPreset.jina_reranker_tiny()
1111

1212

13+
TEST_DOCUMENTS = [
14+
"A machine is a physical system that uses power to apply forces and control movement to perform an action. The term is commonly applied to artificial devices, such as those employing engines or motors, but also to natural biological macromolecules, such as molecular machines.",
15+
"Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences. The ability to learn is possessed by humans, non-human animals, and some machines; there is also evidence for some kind of learning in certain plants.",
16+
"Machine learning is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.",
17+
"Paris, capitale de la France, est une grande ville européenne et un centre mondial de l'art, de la mode, de la gastronomie et de la culture. Son paysage urbain du XIXe siècle est traversé par de larges boulevards et la Seine."
18+
]
19+
20+
1321
def test_rerank():
1422
global server
1523
server.start()
1624
res = server.make_request("POST", "/rerank", data={
1725
"query": "Machine learning is",
18-
"documents": [
19-
"A machine is a physical system that uses power to apply forces and control movement to perform an action. The term is commonly applied to artificial devices, such as those employing engines or motors, but also to natural biological macromolecules, such as molecular machines.",
20-
"Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences. The ability to learn is possessed by humans, non-human animals, and some machines; there is also evidence for some kind of learning in certain plants.",
21-
"Machine learning is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.",
22-
"Paris, capitale de la France, est une grande ville européenne et un centre mondial de l'art, de la mode, de la gastronomie et de la culture. Son paysage urbain du XIXe siècle est traversé par de larges boulevards et la Seine."
23-
]
26+
"documents": TEST_DOCUMENTS,
2427
})
2528
assert res.status_code == 200
2629
assert len(res.body["results"]) == 4
@@ -38,6 +41,29 @@ def test_rerank():
3841
assert least_relevant["index"] == 3
3942

4043

44+
def test_rerank_tei_format():
45+
global server
46+
server.start()
47+
res = server.make_request("POST", "/rerank", data={
48+
"query": "Machine learning is",
49+
"texts": TEST_DOCUMENTS,
50+
})
51+
assert res.status_code == 200
52+
assert len(res.body) == 4
53+
54+
most_relevant = res.body[0]
55+
least_relevant = res.body[0]
56+
for doc in res.body:
57+
if doc["score"] > most_relevant["score"]:
58+
most_relevant = doc
59+
if doc["score"] < least_relevant["score"]:
60+
least_relevant = doc
61+
62+
assert most_relevant["score"] > least_relevant["score"]
63+
assert most_relevant["index"] == 2
64+
assert least_relevant["index"] == 3
65+
66+
4167
@pytest.mark.parametrize("documents", [
4268
[],
4369
None,

examples/server/utils.hpp

+42-20
Original file line numberDiff line numberDiff line change
@@ -737,28 +737,50 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
737737
return res;
738738
}
739739

740-
static json format_response_rerank(const json & request, const json & ranks) {
741-
json data = json::array();
742-
int32_t n_tokens = 0;
743-
int i = 0;
744-
for (const auto & rank : ranks) {
745-
data.push_back(json{
746-
{"index", i++},
747-
{"relevance_score", json_value(rank, "score", 0.0)},
748-
});
740+
static json format_response_rerank(
741+
const json & request,
742+
const json & ranks,
743+
bool is_tei_format,
744+
std::vector<std::string> & texts) {
745+
json res;
746+
if (is_tei_format) {
747+
// TEI response format
748+
res = json::array();
749+
bool return_text = json_value(request, "return_text", false);
750+
for (const auto & rank : ranks) {
751+
int index = json_value(rank, "index", 0);
752+
json elem = json{
753+
{"index", index},
754+
{"score", json_value(rank, "score", 0.0)},
755+
};
756+
if (return_text) {
757+
elem["text"] = std::move(texts[index]);
758+
}
759+
res.push_back(elem);
760+
}
761+
} else {
762+
// Jina response format
763+
json results = json::array();
764+
int32_t n_tokens = 0;
765+
for (const auto & rank : ranks) {
766+
results.push_back(json{
767+
{"index", json_value(rank, "index", 0)},
768+
{"relevance_score", json_value(rank, "score", 0.0)},
769+
});
749770

750-
n_tokens += json_value(rank, "tokens_evaluated", 0);
751-
}
771+
n_tokens += json_value(rank, "tokens_evaluated", 0);
772+
}
752773

753-
json res = json {
754-
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
755-
{"object", "list"},
756-
{"usage", json {
757-
{"prompt_tokens", n_tokens},
758-
{"total_tokens", n_tokens}
759-
}},
760-
{"results", data}
761-
};
774+
res = json{
775+
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
776+
{"object", "list"},
777+
{"usage", json{
778+
{"prompt_tokens", n_tokens},
779+
{"total_tokens", n_tokens}
780+
}},
781+
{"results", results}
782+
};
783+
}
762784

763785
return res;
764786
}

0 commit comments

Comments
 (0)