Skip to content

Commit

Permalink
Refactor Retrievers (#1138)
Browse files Browse the repository at this point in the history
Refactor retrievers into E-RAG style.
The folder structure after refactored is as below:

- retrievers
    - deployment
        - docker_compose
        - kubernetes
    - src
        - integrations
            - redis.py
            - milvus.py
            - ...
        - Dockerfile
        - opea_retrievers_microservice.py
        - README.md
        - requirements.txt
        - set_env.sh

#1004

Signed-off-by: letonghan <[email protected]>
  • Loading branch information
letonghan authored Jan 16, 2025
1 parent 6d07a06 commit c49db2b
Show file tree
Hide file tree
Showing 111 changed files with 1,202 additions and 4,022 deletions.
48 changes: 0 additions & 48 deletions .github/workflows/docker/compose/retrievers-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,51 +7,3 @@ services:
build:
dockerfile: comps/retrievers/src/Dockerfile
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
retriever-redis:
build:
dockerfile: comps/retrievers/redis/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
retriever-qdrant:
build:
dockerfile: comps/retrievers/qdrant/haystack/Dockerfile
image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
retriever-vdms:
build:
dockerfile: comps/retrievers/vdms/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-vdms:${TAG:-latest}
retriever-pgvector:
build:
dockerfile: comps/retrievers/pgvector/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-pgvector:${TAG:-latest}
retriever-pinecone:
build:
dockerfile: comps/retrievers/pinecone/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest}
retriever-milvus:
build:
dockerfile: comps/retrievers/milvus/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-milvus:${TAG:-latest}
retriever-redis-llamaindex:
build:
dockerfile: comps/retrievers/redis/llama_index/Dockerfile
image: ${REGISTRY:-opea}/retriever-redis-llamaindex:${TAG:-latest}
retriever-pathway:
build:
dockerfile: comps/retrievers/pathway/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-pathway:${TAG:-latest}
retriever-neo4j:
build:
dockerfile: comps/retrievers/neo4j/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-neo4j:${TAG:-latest}
retriever-neo4j-llamaindex:
build:
dockerfile: comps/retrievers/neo4j/llama_index/Dockerfile
image: ${REGISTRY:-opea}/retriever-neo4j-llamaindex:${TAG:-latest}
retriever-elasticsearch:
build:
dockerfile: comps/retrievers/elasticsearch/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-elasticsearch:${TAG:-latest}
retriever-opensearch:
build:
dockerfile: comps/retrievers/opensearch/langchain/Dockerfile
image: ${REGISTRY:-opea}/retriever-opensearch:${TAG:-latest}
22 changes: 11 additions & 11 deletions comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,6 @@
import networkx as nx
import openai
import requests
from config import (
NEO4J_PASSWORD,
NEO4J_URL,
NEO4J_USERNAME,
OPENAI_API_KEY,
OPENAI_EMBEDDING_MODEL,
OPENAI_LLM_MODEL,
TEI_EMBEDDING_ENDPOINT,
TGI_LLM_ENDPOINT,
host_ip,
)
from fastapi import File, Form, HTTPException, UploadFile
from graspologic.partition import hierarchical_leiden
from langchain.text_splitter import RecursiveCharacterTextSplitter
Expand All @@ -43,6 +32,17 @@
from transformers import AutoTokenizer

from comps import CustomLogger, DocPath, opea_microservices, register_microservice
from comps.dataprep.neo4j.llama_index.config import (
NEO4J_PASSWORD,
NEO4J_URL,
NEO4J_USERNAME,
OPENAI_API_KEY,
OPENAI_EMBEDDING_MODEL,
OPENAI_LLM_MODEL,
TEI_EMBEDDING_ENDPOINT,
TGI_LLM_ENDPOINT,
host_ip,
)
from comps.dataprep.src.utils import (
document_loader,
encode_filename,
Expand Down
31 changes: 0 additions & 31 deletions comps/retrievers/README.md

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-elasticsearch:latest
image: opea/retriever:latest
container_name: retriever-elasticsearch
ports:
- "7000:7000"
Expand Down
94 changes: 94 additions & 0 deletions comps/retrievers/deployment/docker_compose/retrievers_milvus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: '3.5'

services:
milvus-etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.5
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
healthcheck:
test: ["CMD", "etcdctl", "endpoint", "health"]
interval: 30s
timeout: 20s
retries: 3

milvus-minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
ports:
- "5044:9001"
- "5043:9000"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3

milvus-standalone:
container_name: milvus-standalone
image: milvusdb/milvus:v2.4.9
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
- ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- "etcd"
- "minio"
tei_xeon_service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-xeon-server
ports:
- "6060:80"
volumes:
- "./data:/data"
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever:latest
container_name: retriever-milvus-server
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MILVUS_HOST: ${MILVUS_HOST}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
LOGFLAG: ${LOGFLAG}
restart: unless-stopped

networks:
default:
name: milvus
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ services:
- tgi-gaudi-service
- tei-embedding-service
ports:
- "6004:6004"
- "5000:5000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
host_ip: ${host_ip}
NEO4J_URL: ${NEO4J_URL}
NEO4J_URI: ${NEO4J_URL}
NEO4J_USERNAME: ${NEO4J_USERNAME}
NEO4J_PASSWORD: ${NEO4J_PASSWORD}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
Expand All @@ -100,14 +100,14 @@ services:
- tgi-gaudi-service
- tei-embedding-service
ports:
- "6009:6009"
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
host_ip: ${host_ip}
NEO4J_URL: ${NEO4J_URL}
NEO4J_URI: ${NEO4J_URL}
NEO4J_USERNAME: ${NEO4J_USERNAME}
NEO4J_PASSWORD: ${NEO4J_PASSWORD}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-opensearch-server
image: opea/retriever:latests
container_name: retriever-opensearch-server
ports:
- "7000:7000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-pathway:latest
image: opea/retriever:latest
container_name: retriever-pathway-server
ports:
- "7000:7000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-pgvector:latest
image: opea/retriever:latest
container_name: retriever-pgvector
ports:
- "7000:7000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-pinecone:latest
image: opea/retrievers:latest
container_name: retriever-pinecone-server
ports:
- "7000:7000"
Expand Down
34 changes: 34 additions & 0 deletions comps/retrievers/deployment/docker_compose/retrievers_qdrant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
qdrant-server:
image: qdrant/qdrant
container_name: qdrant-db-server
ports:
- "6333:6333"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
retriever:
image: opea/retriever:latest
container_name: retriever-qdrant-server
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
QDRANT_HOST: ${QDRANT_HOST}
QDRANT_PORT: ${QDRANT_PORT}
INDEX_NAME: ${INDEX_NAME}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_QDRANT"
restart: unless-stopped

networks:
default:
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-redis:latest
image: opea/retriever:latest
container_name: retriever-redis-server
ports:
- "7000:7000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
shm_size: 1g
command: --model-id ${RETRIEVE_MODEL_ID}
retriever:
image: opea/retriever-vdms:latest
image: opea/retriever:latest
container_name: retriever-vdms-server
ports:
- "7000:7000"
Expand Down
28 changes: 0 additions & 28 deletions comps/retrievers/elasticsearch/langchain/Dockerfile

This file was deleted.

Loading

0 comments on commit c49db2b

Please sign in to comment.