diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index e356d6143e4..c464f19cf64 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -334,6 +334,12 @@ Updates the configurations of an index. This endpoint follows PUT semantics, whi - The indexing settings update is automatically picked up by the indexer nodes once the control plane emits a new indexing plan. - The doc mapping update is automatically picked up by the indexer nodes once the control plane emit a new indexing plan. +:::warning + +If you use the ingest or ES bulk API (V2), the old doc mapping will still be used to validate new documents that end up being persisted on existing shards (see [#5738](https://github.com/quickwit-oss/quickwit/issues/5738)). + +::: + Updating the doc mapping doesn't reindex existing data. Queries and results are mapped on a best-effort basis when querying older splits. For more details, check [the reference](updating-mapper.md) out. #### PUT payload diff --git a/docs/reference/updating-mapper.md b/docs/reference/updating-mapper.md index 2341215244b..6b96e276e6b 100644 --- a/docs/reference/updating-mapper.md +++ b/docs/reference/updating-mapper.md @@ -6,6 +6,12 @@ Quickwit allows updating the mapping it uses to add more fields to an existing i When you update a doc mapping for an index, Quickwit will restart indexing pipelines to take the changes into account. As both this operation and the document ingestion are asynchronous, there is no strict happens-before relationship between ingestion and update. This means a document ingested just before the update may be indexed according to the newer doc mapper, and document ingested just after the update may be indexed with the older doc mapper. +:::warning + +If you use the ingest or ES bulk API (V2), the old doc mapping will still be used to validate new documents that end up being persisted on existing shards (see [#5738](https://github.com/quickwit-oss/quickwit/issues/5738)). + +::: + ## Querying Quickwit always validate queries against the most recent mapping. diff --git a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs index 5908fed5612..980cf884822 100644 --- a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Write; use std::time::Duration; use quickwit_config::service::QuickwitService; +use quickwit_rest_client::models::IngestSource; +use quickwit_rest_client::rest_client::CommitType; use serde_json::{json, Value}; use super::assert_hits_unordered; @@ -30,7 +33,6 @@ async fn validate_search_across_doc_mapping_updates( ingest_after_update: &[Value], query_and_expect: &[(&str, Result<&[Value], ()>)], ) { - quickwit_common::setup_logging_for_tests(); let sandbox = ClusterSandboxBuilder::build_and_start_standalone().await; { @@ -579,3 +581,131 @@ async fn test_update_doc_mapping_add_field_on_strict() { ) .await; } + +#[tokio::test] +#[ignore] +// TODO(#5738) +async fn test_update_doc_validation() { + quickwit_common::setup_logging_for_tests(); + let index_id = "update-doc-validation"; + let sandbox = ClusterSandboxBuilder::default() + .add_node([ + QuickwitService::Searcher, + QuickwitService::Metastore, + QuickwitService::Indexer, + QuickwitService::ControlPlane, + QuickwitService::Janitor, + ]) + .build_and_start() + .await; + + { + // Wait for indexer to fully start. + // The starting time is a bit long for a cluster. + tokio::time::sleep(Duration::from_secs(3)).await; + let indexing_service_counters = sandbox + .rest_client(QuickwitService::Indexer) + .node_stats() + .indexing() + .await + .unwrap(); + assert_eq!(indexing_service_counters.num_running_pipelines, 0); + } + + // Create index + sandbox + .rest_client(QuickwitService::Indexer) + .indexes() + .create( + json!({ + "version": "0.8", + "index_id": index_id, + "doc_mapping": { + "field_mappings": [ + {"name": "body", "type": "u64"} + ] + }, + "indexing_settings": { + "commit_timeout_secs": 1 + }, + }) + .to_string(), + quickwit_config::ConfigFormat::Json, + false, + ) + .await + .unwrap(); + + assert!(sandbox + .rest_client(QuickwitService::Indexer) + .node_health() + .is_live() + .await + .unwrap()); + + // Wait until indexing pipelines are started. + sandbox.wait_for_indexing_pipelines(1).await.unwrap(); + + let unsigned_payload = (0..20).fold(String::new(), |mut buffer, id| { + writeln!(&mut buffer, "{{\"body\": {id}}}").unwrap(); + buffer + }); + + let unsigned_response = sandbox + .rest_client(QuickwitService::Indexer) + .ingest( + index_id, + IngestSource::Str(unsigned_payload.clone()), + None, + None, + CommitType::Auto, + ) + .await + .unwrap(); + + assert_eq!(unsigned_response.num_rejected_docs.unwrap(), 0); + + sandbox + .rest_client(QuickwitService::Searcher) + .indexes() + .update( + index_id, + json!({ + "version": "0.8", + "index_id": index_id, + "doc_mapping": { + "field_mappings": [ + {"name": "body", "type": "i64"} + ] + }, + "indexing_settings": { + "commit_timeout_secs": 1, + }, + }) + .to_string(), + quickwit_config::ConfigFormat::Json, + ) + .await + .unwrap(); + + let signed_payload = (-20..0).fold(String::new(), |mut buffer, id| { + writeln!(&mut buffer, "{{\"body\": {id}}}").unwrap(); + buffer + }); + + let signed_response = sandbox + .rest_client(QuickwitService::Indexer) + .ingest( + index_id, + IngestSource::Str(signed_payload.clone()), + None, + None, + CommitType::Auto, + ) + .await + .unwrap(); + + assert_eq!(signed_response.num_rejected_docs.unwrap(), 0); + + sandbox.shutdown().await.unwrap(); +}