diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index 70935397f42..6f2e44794be 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -256,8 +256,8 @@ mod tests { use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions}; use crate::{ - Cardinality, DefaultDocMapperBuilder, DocMapper, DocParsingError, FieldMappingEntry, - TermRange, WarmupInfo, DYNAMIC_FIELD_NAME, + Cardinality, DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, DocParsingError, + FieldMappingEntry, TermRange, WarmupInfo, DYNAMIC_FIELD_NAME, }; const JSON_DEFAULT_DOC_MAPPER: &str = r#" @@ -396,6 +396,130 @@ mod tests { ); } + #[test] + fn test_validate_doc() { + const JSON_CONFIG_VALUE: &str = r#"{ + "timestamp_field": "timestamp", + "field_mappings": [ + { + "name": "timestamp", + "type": "datetime", + "fast": true + }, + { + "name": "body", + "type": "text" + }, + { + "name": "response_date", + "type": "datetime", + "input_formats": ["rfc3339", "unix_timestamp"] + }, + { + "name": "response_time", + "type": "f64" + }, + { + "name": "response_time_no_coercion", + "type": "f64", + "coerce": false + }, + { + "name": "response_payload", + "type": "bytes" + }, + { + "name": "is_important", + "type": "bool" + }, + { + "name": "properties", + "type": "json" + }, + { + "name": "attributes", + "type": "object", + "field_mappings": [ + { + "name": "numbers", + "type": "array" + } + ] + }] + }"#; + let doc_mapper = serde_json::from_str::(JSON_CONFIG_VALUE).unwrap(); + { + let valid_doc_value = serde_json::json!({ "body": "toto" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(doc_mapper.validate_json_obj(valid_doc_json).is_ok()); + } + { + let valid_doc_value = serde_json::json!({ "response_time": "toto" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(matches!( + doc_mapper.validate_json_obj(valid_doc_json).unwrap_err(), + DocParsingError::ValueError(_, _) + )); + } + { + // coercion is supported + let valid_doc_value = serde_json::json!({ "response_time": "2.3" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(doc_mapper.validate_json_obj(valid_doc_json).is_ok()); + } + { + // coercion disabled + let valid_doc_value = serde_json::json!({ "response_time_no_coercion": "2.3" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(matches!( + doc_mapper.validate_json_obj(valid_doc_json).unwrap_err(), + DocParsingError::ValueError(_, _) + )); + } + { + // coercion disabled + let valid_doc_value = serde_json::json!({ "response_time": [2.3] }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(matches!( + doc_mapper.validate_json_obj(valid_doc_json).unwrap_err(), + DocParsingError::MultiValuesNotSupported(_) + )); + } + { + let valid_doc_value = serde_json::json!({ "attributes": { "numbers": [-2] }}); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(doc_mapper.validate_json_obj(valid_doc_json).is_ok()); + } + } + + #[test] + fn test_validate_doc_mode() { + { + const JSON_CONFIG_VALUE: &str = r#"{ "mode": "strict", "field_mappings": [] }"#; + let doc_mapper = serde_json::from_str::(JSON_CONFIG_VALUE).unwrap(); + let valid_doc_value = serde_json::json!({ "response_time": "toto" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(matches!( + doc_mapper.validate_json_obj(valid_doc_json).unwrap_err(), + DocParsingError::NoSuchFieldInSchema(_) + )); + } + { + const JSON_CONFIG_VALUE: &str = r#"{ "mode": "lenient", "field_mappings": [] }"#; + let doc_mapper = serde_json::from_str::(JSON_CONFIG_VALUE).unwrap(); + let valid_doc_value = serde_json::json!({ "response_time": "toto" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(doc_mapper.validate_json_obj(valid_doc_json).is_ok()); + } + { + const JSON_CONFIG_VALUE: &str = r#"{ "mode": "dynamic", "field_mappings": [] }"#; + let doc_mapper = serde_json::from_str::(JSON_CONFIG_VALUE).unwrap(); + let valid_doc_value = serde_json::json!({ "response_time": "toto" }); + let valid_doc_json = valid_doc_value.as_object().unwrap(); + assert!(doc_mapper.validate_json_obj(valid_doc_json).is_ok()); + } + } + fn hashset(elements: &[&str]) -> HashSet { elements.iter().map(|elem| elem.to_string()).collect() } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/doc_mapper.rs b/quickwit/quickwit-ingest/src/ingest_v2/doc_mapper.rs index a768a312cba..2ae098e6937 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/doc_mapper.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/doc_mapper.rs @@ -204,8 +204,8 @@ mod tests { assert!(contains_message_field); } - #[tokio::test] - async fn test_validate_doc_batch() { + #[test] + fn test_validate_doc_batch() { let doc_mapping_json = r#"{ "mode": "strict", "field_mappings": [ @@ -218,14 +218,12 @@ mod tests { let doc_mapper = try_build_doc_mapper(doc_mapping_json).unwrap(); let doc_batch = DocBatchV2::default(); - let (_, parse_failures) = validate_doc_batch(doc_batch, doc_mapper.clone()) - .await - .unwrap(); + let (_, parse_failures) = validate_doc_batch_impl(doc_batch, doc_mapper.clone()); assert_eq!(parse_failures.len(), 0); let doc_batch = DocBatchV2::for_test(["", "[]", r#"{"foo": "bar"}"#, r#"{"doc": "test-doc-000"}"#]); - let (_, parse_failures) = validate_doc_batch(doc_batch, doc_mapper).await.unwrap(); + let (_, parse_failures) = validate_doc_batch_impl(doc_batch, doc_mapper); assert_eq!(parse_failures.len(), 3); let parse_failure_0 = &parse_failures[0];