From 7d0dd20196d4ac808951bce2d2fa55be303a993e Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sun, 17 Dec 2023 19:57:25 +0900 Subject: [PATCH 1/3] expose tantivy's TermQuery --- src/lib.rs | 29 +++++++++++++++++++++++++++++ src/query.rs | 36 +++++++++++++++++++++++++++++++++++- tantivy/tantivy.pyi | 4 +++- tests/tantivy_test.py | 14 +++++++++++++- 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 302a3218..2bf9e3ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ use ::tantivy as tv; +use ::tantivy::schema::{Term, Value}; use pyo3::{exceptions, prelude::*, wrap_pymodule}; mod document; @@ -20,6 +21,8 @@ use schemabuilder::SchemaBuilder; use searcher::{DocAddress, Order, SearchResult, Searcher}; use snippet::{Snippet, SnippetGenerator}; +use crate::document::extract_value; + /// Python bindings for the search engine library Tantivy. /// /// Tantivy is a full text search engine library written in rust. @@ -153,3 +156,29 @@ pub(crate) fn get_field( Ok(field) } + +pub(crate) fn make_term( + schema: &tv::schema::Schema, + field_name: &str, + field_value: &PyAny, +) -> PyResult { + let field = get_field(schema, field_name)?; + let value = extract_value(field_value)?; + let term = match value { + Value::Str(text) => Term::from_field_text(field, &text), + Value::U64(num) => Term::from_field_u64(field, num), + Value::I64(num) => Term::from_field_i64(field, num), + Value::F64(num) => Term::from_field_f64(field, num), + Value::Date(d) => Term::from_field_date(field, d), + Value::Facet(facet) => Term::from_facet(field, &facet), + Value::Bool(b) => Term::from_field_bool(field, b), + Value::IpAddr(i) => Term::from_field_ip_addr(field, i), + _ => { + return Err(exceptions::PyValueError::new_err(format!( + "Can't create a term for Field `{field_name}` with value `{field_value}`." + ))) + } + }; + + Ok(term) +} diff --git a/src/query.rs b/src/query.rs index ef841a0a..63fbde4d 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,4 +1,5 @@ -use pyo3::prelude::*; +use crate::{make_term, Schema}; +use pyo3::{exceptions, prelude::*, types::PyAny}; use tantivy as tv; /// Tantivy's Query @@ -18,4 +19,37 @@ impl Query { fn __repr__(&self) -> PyResult { Ok(format!("Query({:?})", self.get())) } + + /// Construct a Tantivy's TermQuery + #[staticmethod] + #[pyo3(signature = (schema, field_name, field_value, index_option = "position"))] + pub(crate) fn new_term_query( + schema: &Schema, + field_name: &str, + field_value: &PyAny, + index_option: &str, + ) -> PyResult { + make_term_query(schema, field_name, field_value, index_option) + } +} + +fn make_term_query( + schema: &Schema, + field_name: &str, + field_value: &PyAny, + index_option: &str, +) -> PyResult { + let term = make_term(&schema.inner, field_name, field_value)?; + let index_option = match index_option { + "position" => tv::schema::IndexRecordOption::WithFreqsAndPositions, + "freq" => tv::schema::IndexRecordOption::WithFreqs, + "basic" => tv::schema::IndexRecordOption::Basic, + _ => return Err(exceptions::PyValueError::new_err( + "Invalid index option, valid choices are: 'basic', 'freq' and 'position'" + )) + }; + let inner = tv::query::TermQuery::new(term, index_option); + Ok(Query { + inner: Box::new(inner), + }) } diff --git a/tantivy/tantivy.pyi b/tantivy/tantivy.pyi index 3d2c5802..acef85fb 100644 --- a/tantivy/tantivy.pyi +++ b/tantivy/tantivy.pyi @@ -189,7 +189,9 @@ class Document: class Query: - pass + @staticmethod + def new_term_query(schema: Schema, field_name: str, field_value: Any, index_option: str = "position") -> Query: + pass class Order(Enum): diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 6b4c2b40..1cb081d6 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -7,7 +7,7 @@ import pickle import pytest import tantivy -from tantivy import Document, Index, SchemaBuilder, SnippetGenerator +from tantivy import Document, Index, SchemaBuilder, SnippetGenerator, Query def schema(): @@ -925,3 +925,15 @@ def test_document_snippet(self, dir_index): assert first.end == 23 html_snippet = snippet.to_html() assert html_snippet == "The Old Man and the Sea" + + +class TestQuery(object): + def test_term_query(self, ram_index): + index = ram_index + query = Query.new_term_query(index.schema, "title", "sea") + + result = index.searcher().search(query, 10) + assert len(result.hits) == 1 + _, doc_address = result.hits[0] + searched_doc = index.searcher().doc(doc_address) + assert searched_doc["title"] == ["The Old Man and the Sea"] From e02b47986fc9c6f1de836ec49a9922a83e6ffd85 Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Tue, 19 Dec 2023 16:52:07 +0900 Subject: [PATCH 2/3] rename new_term_query() to term_query() --- src/query.rs | 2 +- tantivy/tantivy.pyi | 2 +- tests/tantivy_test.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/query.rs b/src/query.rs index 63fbde4d..61a59a1b 100644 --- a/src/query.rs +++ b/src/query.rs @@ -23,7 +23,7 @@ impl Query { /// Construct a Tantivy's TermQuery #[staticmethod] #[pyo3(signature = (schema, field_name, field_value, index_option = "position"))] - pub(crate) fn new_term_query( + pub(crate) fn term_query( schema: &Schema, field_name: &str, field_value: &PyAny, diff --git a/tantivy/tantivy.pyi b/tantivy/tantivy.pyi index acef85fb..06c5690d 100644 --- a/tantivy/tantivy.pyi +++ b/tantivy/tantivy.pyi @@ -190,7 +190,7 @@ class Document: class Query: @staticmethod - def new_term_query(schema: Schema, field_name: str, field_value: Any, index_option: str = "position") -> Query: + def term_query(schema: Schema, field_name: str, field_value: Any, index_option: str = "position") -> Query: pass diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 1cb081d6..80c17198 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -930,7 +930,7 @@ def test_document_snippet(self, dir_index): class TestQuery(object): def test_term_query(self, ram_index): index = ram_index - query = Query.new_term_query(index.schema, "title", "sea") + query = Query.term_query(index.schema, "title", "sea") result = index.searcher().search(query, 10) assert len(result.hits) == 1 From 68bc4668e5f86cdcb47bdcfcac31c2812bd1d50f Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Tue, 19 Dec 2023 16:55:43 +0900 Subject: [PATCH 3/3] directly implement query instatiation in Query.term_query() --- src/query.rs | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/query.rs b/src/query.rs index 61a59a1b..53da0890 100644 --- a/src/query.rs +++ b/src/query.rs @@ -29,27 +29,18 @@ impl Query { field_value: &PyAny, index_option: &str, ) -> PyResult { - make_term_query(schema, field_name, field_value, index_option) + let term = make_term(&schema.inner, field_name, field_value)?; + let index_option = match index_option { + "position" => tv::schema::IndexRecordOption::WithFreqsAndPositions, + "freq" => tv::schema::IndexRecordOption::WithFreqs, + "basic" => tv::schema::IndexRecordOption::Basic, + _ => return Err(exceptions::PyValueError::new_err( + "Invalid index option, valid choices are: 'basic', 'freq' and 'position'" + )) + }; + let inner = tv::query::TermQuery::new(term, index_option); + Ok(Query { + inner: Box::new(inner), + }) } } - -fn make_term_query( - schema: &Schema, - field_name: &str, - field_value: &PyAny, - index_option: &str, -) -> PyResult { - let term = make_term(&schema.inner, field_name, field_value)?; - let index_option = match index_option { - "position" => tv::schema::IndexRecordOption::WithFreqsAndPositions, - "freq" => tv::schema::IndexRecordOption::WithFreqs, - "basic" => tv::schema::IndexRecordOption::Basic, - _ => return Err(exceptions::PyValueError::new_err( - "Invalid index option, valid choices are: 'basic', 'freq' and 'position'" - )) - }; - let inner = tv::query::TermQuery::new(term, index_option); - Ok(Query { - inner: Box::new(inner), - }) -}