-
Notifications
You must be signed in to change notification settings - Fork 311
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c9be2c5
commit ddd551c
Showing
9 changed files
with
255 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# import asyncio | ||
# import logging | ||
# from typing import Any, AsyncGenerator, Optional, Union | ||
|
||
# from core.base import ( | ||
# AsyncPipe, | ||
# PipeType, | ||
# R2RLoggingProvider, | ||
# ) | ||
|
||
|
||
|
||
# class ChunkEnrichmentPipe(AsyncPipe): | ||
# """ | ||
# Enriches chunks using a specified embedding model. | ||
# """ | ||
|
||
# class Input(AsyncPipe.Input): | ||
# message: list[DocumentExtraction] | ||
|
||
|
||
# def __init__(self, config: AsyncPipe.PipeConfig, type: PipeType = PipeType.INGESTOR, pipe_logger: Optional[R2RLoggingProvider] = None): | ||
# super().__init__(config, type, pipe_logger) | ||
|
||
# async def run(self, input: Input, state: Optional[AsyncState] = None, run_manager: Optional[RunManager] = None) -> AsyncGenerator[DocumentExtraction, None]: | ||
# pass | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
chunk_enrichment: | ||
template: > | ||
## Task: | ||
You are given a chunk of text. Your task is to enrich it with additional context from additional chunks that form the context of the chunk. | ||
Please make sure that the additional context you provide is relevant to the chunk. | ||
## Context Chunks: | ||
{context_chunks} | ||
## Chunk: | ||
{chunk} | ||
Note that: | ||
- You will make the chunk extremely precise and useful | ||
## Response: | ||
input_types: | ||
chunk: str | ||
context_chunks: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Abstractions for ingestion | ||
|
||
from enum import Enum | ||
|
||
from pydantic import Field | ||
|
||
from .base import R2RSerializable | ||
from .llm import GenerationConfig | ||
|
||
|
||
class ChunkEnrichmentStrategy(str, Enum): | ||
SEMANTIC = "semantic" | ||
NEIGHBORHOOD = "neighborhood" | ||
|
||
def __str__(self) -> str: | ||
return self.value | ||
|
||
class ChunkEnrichmentSettings(R2RSerializable): | ||
""" | ||
Settings for chunk enrichment. | ||
""" | ||
|
||
strategies: list[ChunkEnrichmentStrategy] = Field(default=[], description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.") | ||
forward_chunks: int = Field(default=3, description="The number of chunks to include before the current chunk") | ||
backward_chunks: int = Field(default=3, description="The number of chunks to include after the current chunk") | ||
semantic_neighbors: int = Field(default=10, description="The number of semantic neighbors to include") | ||
semantic_similarity_threshold: float = Field(default=0.7, description="The similarity threshold for semantic neighbors") | ||
generation_config: GenerationConfig = Field(default=GenerationConfig(), description="The generation config to use for chunk enrichment") |