diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs
index 8460640c780..0c45044ebe6 100644
--- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs
+++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/date_time_type.rs
@@ -18,7 +18,7 @@
// along with this program. If not, see .
use indexmap::IndexSet;
-use quickwit_datetime::{DateTimeInputFormat, DateTimeOutputFormat};
+use quickwit_datetime::{DateTimeInputFormat, DateTimeOutputFormat, TantivyDateTime};
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::Value as JsonValue;
use tantivy::schema::{DateTimePrecision, OwnedValue as TantivyValue};
@@ -98,6 +98,29 @@ impl QuickwitDateTimeOptions {
};
Ok(TantivyValue::Date(date_time))
}
+
+ pub(crate) fn reparse_tantivy_value(
+ &self,
+ tantivy_value: &TantivyValue,
+ ) -> Option {
+ match tantivy_value {
+ TantivyValue::Date(date) => Some(*date),
+ TantivyValue::Str(date_time_str) => {
+ quickwit_datetime::parse_date_time_str(date_time_str, &self.input_formats.0).ok()
+ }
+ TantivyValue::U64(timestamp_u64) => {
+ let timestamp_i64 = (*timestamp_u64).try_into().ok()?;
+ quickwit_datetime::parse_timestamp_int(timestamp_i64, &self.input_formats.0).ok()
+ }
+ TantivyValue::I64(timestamp_i64) => {
+ quickwit_datetime::parse_timestamp_int(*timestamp_i64, &self.input_formats.0).ok()
+ }
+ TantivyValue::F64(timestamp_f64) => {
+ quickwit_datetime::parse_timestamp_float(*timestamp_f64, &self.input_formats.0).ok()
+ }
+ _ => None,
+ }
+ }
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
index 857a71669e1..702ece804cd 100644
--- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
+++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
@@ -31,7 +31,6 @@ use tantivy::schema::{
};
use tantivy::tokenizer::{PreTokenizedString, Token};
use tantivy::TantivyDocument as Document;
-use tracing::warn;
use super::date_time_type::QuickwitDateTimeOptions;
use super::field_mapping_entry::{NumericOutputFormat, QuickwitBoolOptions};
@@ -385,44 +384,219 @@ fn extract_json_val(
}
}
+fn value_to_string(value: TantivyValue) -> Result {
+ match value {
+ TantivyValue::Str(s) => return Ok(JsonValue::String(s)),
+ TantivyValue::U64(number) => Some(number.to_string()),
+ TantivyValue::I64(number) => Some(number.to_string()),
+ TantivyValue::F64(number) => Some(number.to_string()),
+ TantivyValue::Bool(b) => Some(b.to_string()),
+ TantivyValue::Date(date) => {
+ return quickwit_datetime::DateTimeOutputFormat::default()
+ .format_to_json(date)
+ .map_err(|_| value);
+ }
+ TantivyValue::IpAddr(ip) => Some(ip.to_string()),
+ _ => None,
+ }
+ .map(JsonValue::String)
+ .ok_or(value)
+}
+
+fn value_to_bool(value: TantivyValue) -> Result {
+ match &value {
+ TantivyValue::Str(s) => s.parse().ok(),
+ TantivyValue::U64(number) => match number {
+ 0 => Some(false),
+ 1 => Some(true),
+ _ => None,
+ },
+ TantivyValue::I64(number) => match number {
+ 0 => Some(false),
+ 1 => Some(true),
+ _ => None,
+ },
+ TantivyValue::Bool(b) => Some(*b),
+ _ => None,
+ }
+ .map(JsonValue::Bool)
+ .ok_or(value)
+}
+
+fn value_to_ip(value: TantivyValue) -> Result {
+ match &value {
+ TantivyValue::Str(s) => s
+ .parse::()
+ .or_else(|_| {
+ s.parse::()
+ .map(|ip| ip.to_ipv6_mapped())
+ })
+ .ok(),
+ TantivyValue::IpAddr(ip) => Some(*ip),
+ _ => None,
+ }
+ .map(|ip| {
+ serde_json::to_value(TantivyValue::IpAddr(ip))
+ .expect("Json serialization should never fail.")
+ })
+ .ok_or(value)
+}
+
+fn value_to_float(
+ value: TantivyValue,
+ numeric_options: &QuickwitNumericOptions,
+) -> Result {
+ match &value {
+ TantivyValue::Str(s) => s.parse().ok(),
+ TantivyValue::U64(number) => Some(*number as f64),
+ TantivyValue::I64(number) => Some(*number as f64),
+ TantivyValue::F64(number) => Some(*number),
+ TantivyValue::Bool(b) => Some(if *b { 1.0 } else { 0.0 }),
+ _ => None,
+ }
+ .and_then(|f64_val| f64_val.to_json(numeric_options.output_format))
+ .ok_or(value)
+}
+
+fn value_to_u64(
+ value: TantivyValue,
+ numeric_options: &QuickwitNumericOptions,
+) -> Result {
+ match &value {
+ TantivyValue::Str(s) => s.parse().ok(),
+ TantivyValue::U64(number) => Some(*number),
+ TantivyValue::I64(number) => (*number).try_into().ok(),
+ TantivyValue::F64(number) => {
+ if (0.0..=(u64::MAX as f64)).contains(number) {
+ Some(*number as u64)
+ } else {
+ None
+ }
+ }
+ TantivyValue::Bool(b) => Some(*b as u64),
+ _ => None,
+ }
+ .and_then(|u64_val| u64_val.to_json(numeric_options.output_format))
+ .ok_or(value)
+}
+
+fn value_to_i64(
+ value: TantivyValue,
+ numeric_options: &QuickwitNumericOptions,
+) -> Result {
+ match &value {
+ TantivyValue::Str(s) => s.parse().ok(),
+ TantivyValue::U64(number) => (*number).try_into().ok(),
+ TantivyValue::I64(number) => Some(*number),
+ TantivyValue::F64(number) => {
+ if ((i64::MIN as f64)..=(i64::MAX as f64)).contains(number) {
+ Some(*number as i64)
+ } else {
+ None
+ }
+ }
+ TantivyValue::Bool(b) => Some(*b as i64),
+ _ => None,
+ }
+ .and_then(|u64_val| u64_val.to_json(numeric_options.output_format))
+ .ok_or(value)
+}
+
+/// Transforms a tantivy object into a serde_json one, without cloning strings.
+/// It still allocates maps.
+// TODO we should probably move this to tantivy, it has the opposite conversion already
+fn tantivy_object_to_json_value_nocopy(object: Vec<(String, TantivyValue)>) -> JsonValue {
+ JsonValue::Object(
+ object
+ .into_iter()
+ .map(|(key, value)| (key, tantivy_value_to_json_value_nocopy(value)))
+ .collect(),
+ )
+}
+
+fn tantivy_value_to_json_value_nocopy(value: TantivyValue) -> JsonValue {
+ match value {
+ TantivyValue::Null => JsonValue::Null,
+ TantivyValue::Str(s) => JsonValue::String(s),
+ TantivyValue::U64(number) => JsonValue::Number(number.into()),
+ TantivyValue::I64(number) => JsonValue::Number(number.into()),
+ TantivyValue::F64(f) => {
+ JsonValue::Number(serde_json::Number::from_f64(f).expect("expected finite f64"))
+ }
+ TantivyValue::Bool(b) => JsonValue::Bool(b),
+ TantivyValue::Array(array) => JsonValue::Array(
+ array
+ .into_iter()
+ .map(tantivy_value_to_json_value_nocopy)
+ .collect(),
+ ),
+ TantivyValue::Object(object) => tantivy_object_to_json_value_nocopy(object),
+ // we shouldn't have these types inside a json field in quickwit
+ TantivyValue::PreTokStr(pretok) => JsonValue::String(pretok.text),
+ TantivyValue::Date(date) => quickwit_datetime::DateTimeOutputFormat::Rfc3339
+ .format_to_json(date)
+ .expect("Invalid datetime is not allowed."),
+ TantivyValue::Facet(facet) => JsonValue::String(facet.to_string()),
+ // TantivyValue::Bytes(Vec) => (), // tantivy would do b64 here
+ TantivyValue::IpAddr(ip_v6) => {
+ let ip_str = if let Some(ip_v4) = ip_v6.to_ipv4_mapped() {
+ ip_v4.to_string()
+ } else {
+ ip_v6.to_string()
+ };
+ JsonValue::String(ip_str)
+ }
+ value => unimplemented!("got unexpected type {value:?} inside json field"),
+ }
+}
+
/// Converts Tantivy::Value into Json Value.
///
/// Makes sure the type and value are consistent before converting.
/// For certain LeafType, we use the type options to format the output.
fn value_to_json(value: TantivyValue, leaf_type: &LeafType) -> Option {
- match (&value, leaf_type) {
- (TantivyValue::Str(_), LeafType::Text(_))
- | (TantivyValue::Bool(_), LeafType::Bool(_))
- | (TantivyValue::IpAddr(_), LeafType::IpAddr(_))
- | (TantivyValue::Object(_), LeafType::Json(_)) => {
- let json_value =
- serde_json::to_value(&value).expect("Json serialization should never fail.");
- Some(json_value)
- }
- (TantivyValue::Bytes(bytes), LeafType::Bytes(bytes_options)) => {
- let json_value = bytes_options.output_format.format_to_json(bytes);
- Some(json_value)
- }
- (TantivyValue::Date(date_time), LeafType::DateTime(date_time_options)) => {
- let json_value = date_time_options
- .output_format
- .format_to_json(*date_time)
- .expect("Invalid datetime is not allowed.");
- Some(json_value)
- }
- (TantivyValue::F64(f64_val), LeafType::F64(numeric_options)) => {
- f64_val.to_json(numeric_options.output_format)
- }
- (TantivyValue::I64(i64_val), LeafType::I64(numeric_options)) => {
- i64_val.to_json(numeric_options.output_format)
+ let res = match leaf_type {
+ LeafType::Text(_) => value_to_string(value),
+ LeafType::Bool(_) => value_to_bool(value),
+ LeafType::IpAddr(_) => value_to_ip(value),
+ LeafType::F64(numeric_options) => value_to_float(value, numeric_options),
+ LeafType::U64(numeric_options) => value_to_u64(value, numeric_options),
+ LeafType::I64(numeric_options) => value_to_i64(value, numeric_options),
+ LeafType::Json(_) => {
+ if let TantivyValue::Object(obj) = value {
+ // TODO do we want to allow almost everything here?
+ return Some(tantivy_object_to_json_value_nocopy(obj));
+ } else {
+ Err(value)
+ }
}
- (TantivyValue::U64(u64_val), LeafType::U64(numeric_options)) => {
- u64_val.to_json(numeric_options.output_format)
+ LeafType::Bytes(bytes_options) => {
+ if let TantivyValue::Bytes(ref bytes) = value {
+ // TODO we could cast str to bytes
+ let json_value = bytes_options.output_format.format_to_json(bytes);
+ Ok(json_value)
+ } else {
+ Err(value)
+ }
}
- _ => {
- warn!(
- "The value type `{:?}` doesn't match the requested type `{:?}`",
- value, leaf_type
+ LeafType::DateTime(date_time_options) => date_time_options
+ .reparse_tantivy_value(&value)
+ .map(|date_time| {
+ date_time_options
+ .output_format
+ .format_to_json(date_time)
+ .expect("Invalid datetime is not allowed.")
+ })
+ .ok_or(value),
+ };
+ match res {
+ Ok(res) => Some(res),
+ Err(value) => {
+ quickwit_common::rate_limited_warn!(
+ limit_per_min = 2,
+ "the value type `{:?}` doesn't match the requested type `{:?}`",
+ value,
+ leaf_type
);
None
}
@@ -1601,7 +1775,23 @@ mod tests {
assert_eq!(
value_to_json(
TantivyValue::F64(0.1),
- &LeafType::F64(numeric_options_number)
+ &LeafType::F64(numeric_options_number.clone())
+ )
+ .unwrap(),
+ serde_json::json!(0.1)
+ );
+ assert_eq!(
+ value_to_json(
+ TantivyValue::U64(1),
+ &LeafType::F64(numeric_options_number.clone())
+ )
+ .unwrap(),
+ serde_json::json!(1.0)
+ );
+ assert_eq!(
+ value_to_json(
+ TantivyValue::Str("0.1".to_string()),
+ &LeafType::F64(numeric_options_number.clone())
)
.unwrap(),
serde_json::json!(0.1)
@@ -1623,11 +1813,15 @@ mod tests {
assert_eq!(
value_to_json(
TantivyValue::I64(-1),
- &LeafType::I64(numeric_options_number)
+ &LeafType::I64(numeric_options_number.clone())
)
.unwrap(),
serde_json::json!(-1)
);
+ assert_eq!(
+ value_to_json(TantivyValue::I64(1), &LeafType::I64(numeric_options_number)).unwrap(),
+ serde_json::json!(1)
+ );
let numeric_options_str = QuickwitNumericOptions {
output_format: NumericOutputFormat::String,
@@ -1643,7 +1837,15 @@ mod tests {
fn test_tantivy_value_to_json_value_u64() {
let numeric_options_number = QuickwitNumericOptions::default();
assert_eq!(
- value_to_json(TantivyValue::U64(1), &LeafType::U64(numeric_options_number)).unwrap(),
+ value_to_json(
+ TantivyValue::U64(1),
+ &LeafType::U64(numeric_options_number.clone())
+ )
+ .unwrap(),
+ serde_json::json!(1u64)
+ );
+ assert_eq!(
+ value_to_json(TantivyValue::I64(1), &LeafType::U64(numeric_options_number)).unwrap(),
serde_json::json!(1u64)
);