From 528665ba15864e4b809c5e3ad2f47b1ad613b887 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 6 Oct 2023 15:14:01 +0800 Subject: [PATCH 1/2] remove Document: DocumentDeserialize dependency The dependency requires users to implement an API they may not use. --- src/core/searcher.rs | 12 +++++++++--- src/schema/document/mod.rs | 2 +- src/store/reader.rs | 11 +++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 23cf8123fa..2ee60b9dc0 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -5,7 +5,7 @@ use std::{fmt, io}; use crate::collector::Collector; use crate::core::{Executor, SegmentReader}; use crate::query::{Bm25StatisticsProvider, EnableScoring, Query}; -use crate::schema::document::Document; +use crate::schema::document::{Document, DocumentDeserialize}; use crate::schema::{Schema, Term}; use crate::space_usage::SearcherSpaceUsage; use crate::store::{CacheStats, StoreReader}; @@ -84,7 +84,10 @@ impl Searcher { /// /// The searcher uses the segment ordinal to route the /// request to the right `Segment`. - pub fn doc(&self, doc_address: DocAddress) -> crate::Result { + pub fn doc( + &self, + doc_address: DocAddress, + ) -> crate::Result { let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; store_reader.get(doc_address.doc_id) } @@ -104,7 +107,10 @@ impl Searcher { /// Fetches a document in an asynchronous manner. #[cfg(feature = "quickwit")] - pub async fn doc_async(&self, doc_address: DocAddress) -> crate::Result { + pub async fn doc_async( + &self, + doc_address: DocAddress, + ) -> crate::Result { let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; store_reader.get_async(doc_address.doc_id).await } diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index a39251a334..ba48409153 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -174,7 +174,7 @@ pub use self::value::{ReferenceValue, Value}; use super::*; /// The core trait representing a document within the index. -pub trait Document: DocumentDeserialize + Send + Sync + 'static { +pub trait Document: Send + Sync + 'static { /// The value of the field. type Value<'a>: Value<'a> + Clone where Self: 'a; diff --git a/src/store/reader.rs b/src/store/reader.rs index 24017ef170..556449a031 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -14,7 +14,7 @@ use super::Decompressor; use crate::directory::FileSlice; use crate::error::DataCorruption; use crate::fastfield::AliveBitSet; -use crate::schema::document::{BinaryDocumentDeserializer, Document}; +use crate::schema::document::{BinaryDocumentDeserializer, Document, DocumentDeserialize}; use crate::space_usage::StoreSpaceUsage; use crate::store::index::Checkpoint; use crate::DocId; @@ -198,7 +198,7 @@ impl StoreReader { /// /// It should not be called to score documents /// for instance. - pub fn get(&self, doc_id: DocId) -> crate::Result { + pub fn get(&self, doc_id: DocId) -> crate::Result { let mut doc_bytes = self.get_document_bytes(doc_id)?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) @@ -235,7 +235,7 @@ impl StoreReader { /// Iterator over all Documents in their order as they are stored in the doc store. /// Use this, if you want to extract all Documents from the doc store. /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong. - pub fn iter<'a: 'b, 'b, D: Document>( + pub fn iter<'a: 'b, 'b, D: Document + DocumentDeserialize>( &'b self, alive_bitset: Option<&'a AliveBitSet>, ) -> impl Iterator> + 'b { @@ -370,7 +370,10 @@ impl StoreReader { } /// Fetches a document asynchronously. Async version of [`get`](Self::get). - pub async fn get_async(&self, doc_id: DocId) -> crate::Result { + pub async fn get_async( + &self, + doc_id: DocId, + ) -> crate::Result { let mut doc_bytes = self.get_document_bytes_async(doc_id).await?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) From ce339fe4160bc45bdc42bf3159dd99591a3d521e Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 12 Oct 2023 17:22:46 +0800 Subject: [PATCH 2/2] remove unnecessary Document bounds --- src/core/searcher.rs | 7 ++----- src/store/reader.rs | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 2ee60b9dc0..c3b6d47aaf 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -84,10 +84,7 @@ impl Searcher { /// /// The searcher uses the segment ordinal to route the /// request to the right `Segment`. - pub fn doc( - &self, - doc_address: DocAddress, - ) -> crate::Result { + pub fn doc(&self, doc_address: DocAddress) -> crate::Result { let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; store_reader.get(doc_address.doc_id) } @@ -107,7 +104,7 @@ impl Searcher { /// Fetches a document in an asynchronous manner. #[cfg(feature = "quickwit")] - pub async fn doc_async( + pub async fn doc_async( &self, doc_address: DocAddress, ) -> crate::Result { diff --git a/src/store/reader.rs b/src/store/reader.rs index 556449a031..16125a1475 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -198,7 +198,7 @@ impl StoreReader { /// /// It should not be called to score documents /// for instance. - pub fn get(&self, doc_id: DocId) -> crate::Result { + pub fn get(&self, doc_id: DocId) -> crate::Result { let mut doc_bytes = self.get_document_bytes(doc_id)?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) @@ -370,10 +370,7 @@ impl StoreReader { } /// Fetches a document asynchronously. Async version of [`get`](Self::get). - pub async fn get_async( - &self, - doc_id: DocId, - ) -> crate::Result { + pub async fn get_async(&self, doc_id: DocId) -> crate::Result { let mut doc_bytes = self.get_document_bytes_async(doc_id).await?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)