diff --git a/common/src/lib.rs b/common/src/lib.rs index 0a51f91fe3..cc4db87671 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -20,7 +20,7 @@ pub use datetime::{DateTime, DateTimePrecision}; pub use group_by::GroupByIteratorExtended; pub use json_path_writer::JsonPathWriter; pub use ownedbytes::{OwnedBytes, StableDeref}; -pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; +pub use serialize::*; pub use vint::{ read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128, }; diff --git a/common/src/serialize.rs b/common/src/serialize.rs index 181d61e54c..c94521d8f4 100644 --- a/common/src/serialize.rs +++ b/common/src/serialize.rs @@ -74,14 +74,14 @@ impl FixedSize for () { impl BinarySerializable for Vec { fn serialize(&self, writer: &mut W) -> io::Result<()> { - VInt(self.len() as u64).serialize(writer)?; + BinarySerializable::serialize(&VInt(self.len() as u64), writer)?; for it in self { it.serialize(writer)?; } Ok(()) } fn deserialize(reader: &mut R) -> io::Result> { - let num_items = VInt::deserialize(reader)?.val(); + let num_items = ::deserialize(reader)?.val(); let mut items: Vec = Vec::with_capacity(num_items as usize); for _ in 0..num_items { let item = T::deserialize(reader)?; @@ -236,12 +236,12 @@ impl FixedSize for bool { impl BinarySerializable for String { fn serialize(&self, writer: &mut W) -> io::Result<()> { let data: &[u8] = self.as_bytes(); - VInt(data.len() as u64).serialize(writer)?; + BinarySerializable::serialize(&VInt(data.len() as u64), writer)?; writer.write_all(data) } fn deserialize(reader: &mut R) -> io::Result { - let string_length = VInt::deserialize(reader)?.val() as usize; + let string_length = ::deserialize(reader)?.val() as usize; let mut result = String::with_capacity(string_length); reader .take(string_length as u64) @@ -253,12 +253,12 @@ impl BinarySerializable for String { impl<'a> BinarySerializable for Cow<'a, str> { fn serialize(&self, writer: &mut W) -> io::Result<()> { let data: &[u8] = self.as_bytes(); - VInt(data.len() as u64).serialize(writer)?; + BinarySerializable::serialize(&VInt(data.len() as u64), writer)?; writer.write_all(data) } fn deserialize(reader: &mut R) -> io::Result> { - let string_length = VInt::deserialize(reader)?.val() as usize; + let string_length = ::deserialize(reader)?.val() as usize; let mut result = String::with_capacity(string_length); reader .take(string_length as u64) @@ -269,18 +269,18 @@ impl<'a> BinarySerializable for Cow<'a, str> { impl<'a> BinarySerializable for Cow<'a, [u8]> { fn serialize(&self, writer: &mut W) -> io::Result<()> { - VInt(self.len() as u64).serialize(writer)?; + BinarySerializable::serialize(&VInt(self.len() as u64), writer)?; for it in self.iter() { - it.serialize(writer)?; + BinarySerializable::serialize(it, writer)?; } Ok(()) } fn deserialize(reader: &mut R) -> io::Result> { - let num_items = VInt::deserialize(reader)?.val(); + let num_items = ::deserialize(reader)?.val(); let mut items: Vec = Vec::with_capacity(num_items as usize); for _ in 0..num_items { - let item = u8::deserialize(reader)?; + let item = ::deserialize(reader)?; items.push(item); } Ok(Cow::Owned(items)) diff --git a/src/compat_tests.rs b/src/compat_tests.rs index 6e75c5de2a..e77e94527b 100644 --- a/src/compat_tests.rs +++ b/src/compat_tests.rs @@ -44,8 +44,19 @@ fn test_format_6() { assert_date_time_precision(&index, DateTimePrecision::Microseconds); } +/// feature flag quickwit uses a different dictionary type +#[test] +#[cfg(not(feature = "quickwit"))] +fn test_format_7() { + let path = path_for_version("7"); + + let index = Index::open_in_dir(path).expect("Failed to open index"); + // dates are not truncated in v7 in the docstore + assert_date_time_precision(&index, DateTimePrecision::Nanoseconds); +} + #[cfg(not(feature = "quickwit"))] -fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) { +fn assert_date_time_precision(index: &Index, doc_store_precision: DateTimePrecision) { use collector::TopDocs; let reader = index.reader().expect("Failed to create reader"); let searcher = reader.searcher(); @@ -75,6 +86,6 @@ fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) { .as_datetime() .unwrap(); - let expected = DateTime::from_timestamp_nanos(123456).truncate(precision); + let expected = DateTime::from_timestamp_nanos(123456).truncate(doc_store_precision); assert_eq!(date_value, expected,); } diff --git a/src/lib.rs b/src/lib.rs index c34131e0a3..df6ba8d3ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -232,7 +232,7 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter}; pub use crate::schema::{Document, TantivyDocument, Term}; /// Index format version. -pub const INDEX_FORMAT_VERSION: u32 = 6; +pub const INDEX_FORMAT_VERSION: u32 = 7; /// Oldest index format version this tantivy version can read. pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index e80bff2c9e..01ab0afdc0 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -22,6 +22,7 @@ use super::se::BinaryObjectSerializer; use super::{OwnedValue, Value}; use crate::schema::document::type_codes; use crate::schema::{Facet, Field}; +use crate::store::DocStoreVersion; use crate::tokenizer::PreTokenizedString; #[derive(Debug, thiserror::Error, Clone)] @@ -45,6 +46,9 @@ pub enum DeserializeError { #[error("{0}")] /// A custom error message. Custom(String), + #[error("Version {0}, Max version supported: {1}")] + /// Unsupported version error. + UnsupportedVersion(u32, u32), } impl DeserializeError { @@ -291,6 +295,7 @@ pub trait ObjectAccess<'de> { pub struct BinaryDocumentDeserializer<'de, R> { length: usize, position: usize, + doc_store_version: DocStoreVersion, reader: &'de mut R, } @@ -298,12 +303,16 @@ impl<'de, R> BinaryDocumentDeserializer<'de, R> where R: Read { /// Attempts to create a new document deserializer from a given reader. - pub(crate) fn from_reader(reader: &'de mut R) -> Result { + pub(crate) fn from_reader( + reader: &'de mut R, + doc_store_version: DocStoreVersion, + ) -> Result { let length = VInt::deserialize(reader)?; Ok(Self { length: length.val() as usize, position: 0, + doc_store_version, reader, }) } @@ -329,8 +338,8 @@ where R: Read } let field = Field::deserialize(self.reader).map_err(DeserializeError::from)?; - - let deserializer = BinaryValueDeserializer::from_reader(self.reader)?; + let deserializer = + BinaryValueDeserializer::from_reader(self.reader, self.doc_store_version)?; let value = V::deserialize(deserializer)?; self.position += 1; @@ -344,13 +353,17 @@ where R: Read pub struct BinaryValueDeserializer<'de, R> { value_type: ValueType, reader: &'de mut R, + doc_store_version: DocStoreVersion, } impl<'de, R> BinaryValueDeserializer<'de, R> where R: Read { /// Attempts to create a new value deserializer from a given reader. - fn from_reader(reader: &'de mut R) -> Result { + fn from_reader( + reader: &'de mut R, + doc_store_version: DocStoreVersion, + ) -> Result { let type_code = ::deserialize(reader)?; let value_type = match type_code { @@ -391,7 +404,11 @@ where R: Read } }; - Ok(Self { value_type, reader }) + Ok(Self { + value_type, + reader, + doc_store_version, + }) } fn validate_type(&self, expected_type: ValueType) -> Result<(), DeserializeError> { @@ -438,7 +455,16 @@ where R: Read fn deserialize_datetime(self) -> Result { self.validate_type(ValueType::DateTime)?; - ::deserialize(self.reader).map_err(DeserializeError::from) + match self.doc_store_version { + DocStoreVersion::V1 => { + let timestamp_micros = ::deserialize(self.reader)?; + Ok(DateTime::from_timestamp_micros(timestamp_micros)) + } + DocStoreVersion::V2 => { + let timestamp_nanos = ::deserialize(self.reader)?; + Ok(DateTime::from_timestamp_nanos(timestamp_nanos)) + } + } } fn deserialize_facet(self) -> Result { @@ -514,11 +540,13 @@ where R: Read visitor.visit_pre_tokenized_string(val) } ValueType::Array => { - let access = BinaryArrayDeserializer::from_reader(self.reader)?; + let access = + BinaryArrayDeserializer::from_reader(self.reader, self.doc_store_version)?; visitor.visit_array(access) } ValueType::Object => { - let access = BinaryObjectDeserializer::from_reader(self.reader)?; + let access = + BinaryObjectDeserializer::from_reader(self.reader, self.doc_store_version)?; visitor.visit_object(access) } #[allow(deprecated)] @@ -537,7 +565,8 @@ where R: Read let out_rc = std::rc::Rc::new(out); let mut slice: &[u8] = &out_rc; - let access = BinaryObjectDeserializer::from_reader(&mut slice)?; + let access = + BinaryObjectDeserializer::from_reader(&mut slice, self.doc_store_version)?; visitor.visit_object(access) } @@ -551,19 +580,24 @@ pub struct BinaryArrayDeserializer<'de, R> { length: usize, position: usize, reader: &'de mut R, + doc_store_version: DocStoreVersion, } impl<'de, R> BinaryArrayDeserializer<'de, R> where R: Read { /// Attempts to create a new array deserializer from a given reader. - fn from_reader(reader: &'de mut R) -> Result { + fn from_reader( + reader: &'de mut R, + doc_store_version: DocStoreVersion, + ) -> Result { let length = ::deserialize(reader)?; Ok(Self { length: length.val() as usize, position: 0, reader, + doc_store_version, }) } @@ -587,7 +621,8 @@ where R: Read return Ok(None); } - let deserializer = BinaryValueDeserializer::from_reader(self.reader)?; + let deserializer = + BinaryValueDeserializer::from_reader(self.reader, self.doc_store_version)?; let value = V::deserialize(deserializer)?; // Advance the position cursor. @@ -610,8 +645,11 @@ impl<'de, R> BinaryObjectDeserializer<'de, R> where R: Read { /// Attempts to create a new object deserializer from a given reader. - fn from_reader(reader: &'de mut R) -> Result { - let inner = BinaryArrayDeserializer::from_reader(reader)?; + fn from_reader( + reader: &'de mut R, + doc_store_version: DocStoreVersion, + ) -> Result { + let inner = BinaryArrayDeserializer::from_reader(reader, doc_store_version)?; Ok(Self { inner }) } } @@ -819,6 +857,7 @@ mod tests { use crate::schema::document::existing_type_impls::JsonObjectIter; use crate::schema::document::se::BinaryValueSerializer; use crate::schema::document::{ReferenceValue, ReferenceValueLeaf}; + use crate::store::DOC_STORE_VERSION; fn serialize_value<'a>(value: ReferenceValue<'a, &'a serde_json::Value>) -> Vec { let mut writer = Vec::new(); @@ -829,9 +868,19 @@ mod tests { writer } + fn serialize_owned_value<'a>(value: ReferenceValue<'a, &'a OwnedValue>) -> Vec { + let mut writer = Vec::new(); + + let mut serializer = BinaryValueSerializer::new(&mut writer); + serializer.serialize_value(value).expect("Serialize value"); + + writer + } + fn deserialize_value(buffer: Vec) -> crate::schema::OwnedValue { let mut cursor = Cursor::new(buffer); - let deserializer = BinaryValueDeserializer::from_reader(&mut cursor).unwrap(); + let deserializer = + BinaryValueDeserializer::from_reader(&mut cursor, DOC_STORE_VERSION).unwrap(); crate::schema::OwnedValue::deserialize(deserializer).expect("Deserialize value") } @@ -1010,6 +1059,17 @@ mod tests { assert_eq!(value, expected_val); } + #[test] + fn test_nested_date_precision() { + let object = OwnedValue::Object(vec![( + "my-date".into(), + OwnedValue::Date(DateTime::from_timestamp_nanos(323456)), + )]); + let result = serialize_owned_value((&object).as_value()); + let value = deserialize_value(result); + assert_eq!(value, object); + } + #[test] fn test_nested_serialize() { let mut object = serde_json::Map::new(); diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index edc8399b6e..9ad5003b70 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -81,6 +81,15 @@ where W: Write Self { writer } } + fn serialize_with_type_code( + &mut self, + code: u8, + val: &T, + ) -> io::Result<()> { + self.write_type_code(code)?; + BinarySerializable::serialize(val, self.writer) + } + /// Attempts to serialize a given value and write the output /// to the writer. pub(crate) fn serialize_value<'a, V>( @@ -94,56 +103,38 @@ where W: Write ReferenceValue::Leaf(leaf) => match leaf { ReferenceValueLeaf::Null => self.write_type_code(type_codes::NULL_CODE), ReferenceValueLeaf::Str(val) => { - self.write_type_code(type_codes::TEXT_CODE)?; - - let temp_val = Cow::Borrowed(val); - temp_val.serialize(self.writer) + self.serialize_with_type_code(type_codes::TEXT_CODE, &Cow::Borrowed(val)) } ReferenceValueLeaf::U64(val) => { - self.write_type_code(type_codes::U64_CODE)?; - - val.serialize(self.writer) + self.serialize_with_type_code(type_codes::U64_CODE, &val) } ReferenceValueLeaf::I64(val) => { - self.write_type_code(type_codes::I64_CODE)?; - - val.serialize(self.writer) + self.serialize_with_type_code(type_codes::I64_CODE, &val) } ReferenceValueLeaf::F64(val) => { - self.write_type_code(type_codes::F64_CODE)?; - - f64_to_u64(val).serialize(self.writer) + self.serialize_with_type_code(type_codes::F64_CODE, &f64_to_u64(val)) } ReferenceValueLeaf::Date(val) => { self.write_type_code(type_codes::DATE_CODE)?; - val.serialize(self.writer) - } - ReferenceValueLeaf::Facet(val) => { - self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?; - - Cow::Borrowed(val).serialize(self.writer) + let timestamp_nanos: i64 = val.into_timestamp_nanos(); + BinarySerializable::serialize(×tamp_nanos, self.writer) } + ReferenceValueLeaf::Facet(val) => self.serialize_with_type_code( + type_codes::HIERARCHICAL_FACET_CODE, + &Cow::Borrowed(val), + ), ReferenceValueLeaf::Bytes(val) => { - self.write_type_code(type_codes::BYTES_CODE)?; - - let temp_val = Cow::Borrowed(val); - temp_val.serialize(self.writer) + self.serialize_with_type_code(type_codes::BYTES_CODE, &Cow::Borrowed(val)) } ReferenceValueLeaf::IpAddr(val) => { - self.write_type_code(type_codes::IP_CODE)?; - - val.to_u128().serialize(self.writer) + self.serialize_with_type_code(type_codes::IP_CODE, &val.to_u128()) } ReferenceValueLeaf::Bool(val) => { - self.write_type_code(type_codes::BOOL_CODE)?; - - val.serialize(self.writer) + self.serialize_with_type_code(type_codes::BOOL_CODE, &val) } ReferenceValueLeaf::PreTokStr(val) => { self.write_type_code(type_codes::EXT_CODE)?; - self.write_type_code(type_codes::TOK_STR_EXT_CODE)?; - - val.serialize(self.writer) + self.serialize_with_type_code(type_codes::TOK_STR_EXT_CODE, &*val) } }, ReferenceValue::Array(elements) => { @@ -306,7 +297,6 @@ where W: Write mod tests { use std::collections::BTreeMap; - use common::DateTime; use serde_json::Number; use tokenizer_api::Token; @@ -337,7 +327,10 @@ mod tests { $ext_code.serialize(&mut writer).unwrap(); )? - $value.serialize(&mut writer).unwrap(); + BinarySerializable::serialize( + &$value, + &mut writer, + ).unwrap(); )* writer @@ -355,7 +348,10 @@ mod tests { $ext_code.serialize(&mut writer).unwrap(); )? - $value.serialize(&mut writer).unwrap(); + BinarySerializable::serialize( + &$value, + &mut writer, + ).unwrap(); )* writer @@ -418,15 +414,6 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValueLeaf::Date(DateTime::MAX).into()); - let expected = binary_repr!( - type_codes::DATE_CODE => DateTime::MAX, - ); - assert_eq!( - result, expected, - "Expected serialized value to match the binary representation" - ); - let facet = Facet::from_text("/hello/world").unwrap(); let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into()); let expected = binary_repr!( diff --git a/src/schema/facet.rs b/src/schema/facet.rs index 275b9cb904..64b5981832 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -4,7 +4,7 @@ use std::io::{self, Read, Write}; use std::str; use std::string::FromUtf8Error; -use common::BinarySerializable; +use common::*; use once_cell::sync::Lazy; use regex::Regex; use serde::de::Error as _; diff --git a/src/store/footer.rs b/src/store/footer.rs index 3505a55e0e..b4cc65a201 100644 --- a/src/store/footer.rs +++ b/src/store/footer.rs @@ -2,12 +2,13 @@ use std::io; use common::{BinarySerializable, FixedSize, HasLen}; -use super::{Decompressor, DOC_STORE_VERSION}; +use super::{Decompressor, DocStoreVersion, DOC_STORE_VERSION}; use crate::directory::FileSlice; #[derive(Debug, Clone, PartialEq)] pub struct DocStoreFooter { pub offset: u64, + pub doc_store_version: DocStoreVersion, pub decompressor: Decompressor, } @@ -25,9 +26,11 @@ impl BinarySerializable for DocStoreFooter { } fn deserialize(reader: &mut R) -> io::Result { - let doc_store_version = u32::deserialize(reader)?; - if doc_store_version != DOC_STORE_VERSION { - panic!("actual doc store version: {doc_store_version}, expected: {DOC_STORE_VERSION}"); + let doc_store_version = DocStoreVersion::deserialize(reader)?; + if doc_store_version > DOC_STORE_VERSION { + panic!( + "actual doc store version: {doc_store_version}, max_supported: {DOC_STORE_VERSION}" + ); } let offset = u64::deserialize(reader)?; let compressor_id = u8::deserialize(reader)?; @@ -35,6 +38,7 @@ impl BinarySerializable for DocStoreFooter { reader.read_exact(&mut skip_buf)?; Ok(DocStoreFooter { offset, + doc_store_version, decompressor: Decompressor::from_id(compressor_id), }) } @@ -45,9 +49,14 @@ impl FixedSize for DocStoreFooter { } impl DocStoreFooter { - pub fn new(offset: u64, decompressor: Decompressor) -> Self { + pub fn new( + offset: u64, + decompressor: Decompressor, + doc_store_version: DocStoreVersion, + ) -> Self { DocStoreFooter { offset, + doc_store_version, decompressor, } } diff --git a/src/store/mod.rs b/src/store/mod.rs index 63327f0739..2a960ff1c0 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -35,15 +35,16 @@ mod footer; mod index; mod reader; mod writer; + pub use self::compressors::{Compressor, ZstdCompressor}; pub use self::decompressors::Decompressor; -pub(crate) use self::reader::DOCSTORE_CACHE_CAPACITY; pub use self::reader::{CacheStats, StoreReader}; +pub(crate) use self::reader::{DocStoreVersion, DOCSTORE_CACHE_CAPACITY}; pub use self::writer::StoreWriter; mod store_compressor; /// Doc store version in footer to handle format changes. -pub(crate) const DOC_STORE_VERSION: u32 = 1; +pub(crate) const DOC_STORE_VERSION: DocStoreVersion = DocStoreVersion::V2; #[cfg(feature = "lz4-compression")] mod compression_lz4_block; diff --git a/src/store/reader.rs b/src/store/reader.rs index dd88b776f7..21e101dab0 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -1,3 +1,4 @@ +use std::fmt::Display; use std::io; use std::iter::Sum; use std::num::NonZeroUsize; @@ -25,9 +26,43 @@ pub(crate) const DOCSTORE_CACHE_CAPACITY: usize = 100; type Block = OwnedBytes; +/// The format version of the document store. +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub(crate) enum DocStoreVersion { + V1 = 1, + V2 = 2, +} +impl Display for DocStoreVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DocStoreVersion::V1 => write!(f, "V1"), + DocStoreVersion::V2 => write!(f, "V2"), + } + } +} +impl BinarySerializable for DocStoreVersion { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + (*self as u32).serialize(writer) + } + + fn deserialize(reader: &mut R) -> io::Result { + Ok(match u32::deserialize(reader)? { + 1 => DocStoreVersion::V1, + 2 => DocStoreVersion::V2, + v => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid doc store version {}", v), + )) + } + }) + } +} + /// Reads document off tantivy's [`Store`](./index.html) pub struct StoreReader { decompressor: Decompressor, + doc_store_version: DocStoreVersion, data: FileSlice, skip_index: Arc, space_usage: StoreSpaceUsage, @@ -129,6 +164,7 @@ impl StoreReader { let skip_index = SkipIndex::open(index_data); Ok(StoreReader { decompressor: footer.decompressor, + doc_store_version: footer.doc_store_version, data: data_file, cache: BlockCache { cache: NonZeroUsize::new(cache_num_blocks) @@ -203,8 +239,9 @@ impl StoreReader { pub fn get(&self, doc_id: DocId) -> crate::Result { let mut doc_bytes = self.get_document_bytes(doc_id)?; - let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) - .map_err(crate::TantivyError::from)?; + let deserializer = + BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version) + .map_err(crate::TantivyError::from)?; D::deserialize(deserializer).map_err(crate::TantivyError::from) } @@ -244,8 +281,9 @@ impl StoreReader { self.iter_raw(alive_bitset).map(|doc_bytes_res| { let mut doc_bytes = doc_bytes_res?; - let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) - .map_err(crate::TantivyError::from)?; + let deserializer = + BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version) + .map_err(crate::TantivyError::from)?; D::deserialize(deserializer).map_err(crate::TantivyError::from) }) } @@ -391,8 +429,9 @@ impl StoreReader { ) -> crate::Result { let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?; - let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) - .map_err(crate::TantivyError::from)?; + let deserializer = + BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version) + .map_err(crate::TantivyError::from)?; D::deserialize(deserializer).map_err(crate::TantivyError::from) } } @@ -414,6 +453,11 @@ mod tests { doc.get_first(*field).and_then(|f| f.as_value().as_str()) } + #[test] + fn test_doc_store_version_ord() { + assert!(DocStoreVersion::V1 < DocStoreVersion::V2); + } + #[test] fn test_store_lru_cache() -> crate::Result<()> { let directory = RamDirectory::create(); diff --git a/src/store/store_compressor.rs b/src/store/store_compressor.rs index c528790480..ca9f107e6c 100644 --- a/src/store/store_compressor.rs +++ b/src/store/store_compressor.rs @@ -5,6 +5,7 @@ use std::{io, thread}; use common::{BinarySerializable, CountingWriter, TerminatingWrite}; +use super::DOC_STORE_VERSION; use crate::directory::WritePtr; use crate::store::footer::DocStoreFooter; use crate::store::index::{Checkpoint, SkipIndexBuilder}; @@ -143,8 +144,11 @@ impl BlockCompressorImpl { fn close(mut self) -> io::Result<()> { let header_offset: u64 = self.writer.written_bytes(); - let docstore_footer = - DocStoreFooter::new(header_offset, Decompressor::from(self.compressor)); + let docstore_footer = DocStoreFooter::new( + header_offset, + Decompressor::from(self.compressor), + DOC_STORE_VERSION, + ); self.offset_index_writer.serialize_into(&mut self.writer)?; docstore_footer.serialize(&mut self.writer)?; self.writer.terminate() diff --git a/src/tokenizer/tokenized_string.rs b/src/tokenizer/tokenized_string.rs index 8fbf51f8c5..e5d67cd9bf 100644 --- a/src/tokenizer/tokenized_string.rs +++ b/src/tokenizer/tokenized_string.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::io; use std::io::{Read, Write}; -use common::BinarySerializable; +use common::*; use crate::tokenizer::{Token, TokenStream}; diff --git a/tests/compat_tests_data/index_v7/.managed.json b/tests/compat_tests_data/index_v7/.managed.json new file mode 100644 index 0000000000..ab0d201e66 --- /dev/null +++ b/tests/compat_tests_data/index_v7/.managed.json @@ -0,0 +1 @@ +["meta.json","000002f0000000000000000000000000.fieldnorm","000002f0000000000000000000000000.pos","000002f0000000000000000000000000.store","000002f0000000000000000000000000.term","000002f0000000000000000000000000.fast","000002f0000000000000000000000000.idx"] diff --git a/tests/compat_tests_data/index_v7/.tantivy-meta.lock b/tests/compat_tests_data/index_v7/.tantivy-meta.lock new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compat_tests_data/index_v7/.tantivy-writer.lock b/tests/compat_tests_data/index_v7/.tantivy-writer.lock new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fast b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fast new file mode 100644 index 0000000000..8bd1d4318d Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fast differ diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fieldnorm b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fieldnorm new file mode 100644 index 0000000000..49005c2eb9 Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.fieldnorm differ diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.idx b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.idx new file mode 100644 index 0000000000..6d29a6b57f Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.idx differ diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.pos b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.pos new file mode 100644 index 0000000000..a50c38d543 Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.pos differ diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.store b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.store new file mode 100644 index 0000000000..8dd320ce90 Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.store differ diff --git a/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.term b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.term new file mode 100644 index 0000000000..f1a6530979 Binary files /dev/null and b/tests/compat_tests_data/index_v7/000002f0000000000000000000000000.term differ diff --git a/tests/compat_tests_data/index_v7/meta.json b/tests/compat_tests_data/index_v7/meta.json new file mode 100644 index 0000000000..3304b1d52c --- /dev/null +++ b/tests/compat_tests_data/index_v7/meta.json @@ -0,0 +1,40 @@ +{ + "index_settings": { + "docstore_compression": "lz4", + "docstore_blocksize": 16384 + }, + "segments": [ + { + "segment_id": "000002f0-0000-0000-0000-000000000000", + "max_doc": 1, + "deletes": null + } + ], + "schema": [ + { + "name": "label", + "type": "text", + "options": { + "indexing": { + "record": "position", + "fieldnorms": true, + "tokenizer": "default" + }, + "stored": true, + "fast": false + } + }, + { + "name": "date", + "type": "date", + "options": { + "indexed": true, + "fieldnorms": true, + "fast": false, + "stored": true, + "precision": "seconds" + } + } + ], + "opstamp": 2 +}