diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh index 16e26851fa..6854fb4d67 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh @@ -40,8 +40,8 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} export RERANK_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" docker compose -f compose.yaml up -d diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh b/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh index 092a1ba3f7..a2f218c3da 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -41,6 +41,6 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} export RERANK_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" diff --git a/AgentQnA/retrieval_tool/index_data.py b/AgentQnA/retrieval_tool/index_data.py index 410a83cfb1..ecf4fb0a06 100644 --- a/AgentQnA/retrieval_tool/index_data.py +++ b/AgentQnA/retrieval_tool/index_data.py @@ -53,7 +53,7 @@ def main(): host_ip = args.host_ip port = args.port proxies = {"http": ""} - url = "http://{host_ip}:{port}/v1/dataprep".format(host_ip=host_ip, port=port) + url = "http://{host_ip}:{port}/v1/dataprep/ingest".format(host_ip=host_ip, port=port) # Split jsonl file into json files files = split_jsonl_into_txts(os.path.join(args.filedir, args.filename)) diff --git a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh index b0c22fea41..40ff1ff62a 100644 --- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh +++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh @@ -19,8 +19,8 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} export RERANK_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete" docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d diff --git a/AgentQnA/tests/step1_build_images.sh b/AgentQnA/tests/step1_build_images.sh index 4782da677a..8dc3a132b2 100644 --- a/AgentQnA/tests/step1_build_images.sh +++ b/AgentQnA/tests/step1_build_images.sh @@ -21,7 +21,7 @@ function build_docker_images_for_retrieval_tool(){ # git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ get_genai_comps echo "Build all the images with --no-cache..." - service_list="doc-index-retriever dataprep-redis embedding retriever reranking" + service_list="doc-index-retriever dataprep embedding retriever reranking" docker compose -f build.yaml build ${service_list} --no-cache docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/ChatQnA/README.md b/ChatQnA/README.md index 728267197e..37fafc358c 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -202,8 +202,8 @@ Gaudi default compose.yaml | Embedding | Langchain | Xeon | 6000 | /v1/embeddings | | Retriever | Langchain, Redis | Xeon | 7000 | /v1/retrieval | | Reranking | Langchain, TEI | Gaudi | 8000 | /v1/reranking | -| LLM | Langchain, vLLM | Gaudi | 9000 | /v1/chat/completions | -| Dataprep | Redis, Langchain | Xeon | 6007 | /v1/dataprep | +| LLM | Langchain, TGI | Gaudi | 9000 | /v1/chat/completions | +| Dataprep | Redis, Langchain | Xeon | 6007 | /v1/dataprep/ingest | ### Required Models @@ -294,7 +294,7 @@ Here is an example of `Nike 2023` pdf. # download pdf file wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` diff --git a/ChatQnA/benchmark/accuracy/README.md b/ChatQnA/benchmark/accuracy/README.md index c073139486..ddb4c58aae 100644 --- a/ChatQnA/benchmark/accuracy/README.md +++ b/ChatQnA/benchmark/accuracy/README.md @@ -72,14 +72,14 @@ python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_ If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following: ```bash -python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna +python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep/ingest --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna ``` The default values for arguments are: |Argument|Default value| |--------|-------------| |service_url|http://localhost:8888/v1/chatqna| -|database_endpoint|http://localhost:6007/v1/dataprep| +|database_endpoint|http://localhost:6007/v1/dataprep/ingest| |embedding_endpoint|http://localhost:6000/v1/embeddings| |tei_embedding_endpoint|http://localhost:8090| |retrieval_endpoint|http://localhost:7000/v1/retrieval| @@ -139,14 +139,14 @@ python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/8 If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following: ```bash -python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna +python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep/ingest --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna ``` The default values for arguments are: |Argument|Default value| |--------|-------------| |service_url|http://localhost:8888/v1/chatqna| -|database_endpoint|http://localhost:6007/v1/dataprep| +|database_endpoint|http://localhost:6007/v1/dataprep/ingest| |embedding_endpoint|http://localhost:6000/v1/embeddings| |retrieval_endpoint|http://localhost:7000/v1/retrieval| |reranking_endpoint|http://localhost:8000/v1/reranking| diff --git a/ChatQnA/benchmark/accuracy/eval_crud.py b/ChatQnA/benchmark/accuracy/eval_crud.py index f6e3e25a0a..b801e4f8a7 100644 --- a/ChatQnA/benchmark/accuracy/eval_crud.py +++ b/ChatQnA/benchmark/accuracy/eval_crud.py @@ -149,7 +149,7 @@ def args_parser(): parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform") parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database") parser.add_argument( - "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address." + "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep/ingest", help="Service URL address." ) parser.add_argument( "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address." diff --git a/ChatQnA/benchmark/accuracy/eval_multihop.py b/ChatQnA/benchmark/accuracy/eval_multihop.py index a8f2b9911a..3bede38036 100644 --- a/ChatQnA/benchmark/accuracy/eval_multihop.py +++ b/ChatQnA/benchmark/accuracy/eval_multihop.py @@ -211,7 +211,7 @@ def args_parser(): parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.") parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge") parser.add_argument( - "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address." + "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep/ingest", help="Service URL address." ) parser.add_argument( "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address." diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md index abd5aa4f72..a59dce5a7a 100644 --- a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md +++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md @@ -164,7 +164,7 @@ Use the following `cURL` command to upload file: ```bash cd GenAIEval/evals/benchmark/data -curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \ +curl -X POST "http://${cluster_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "chunk_size=3800" \ -F "files=@./upload_file.txt" diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/README.md b/ChatQnA/docker_compose/amd/gpu/rocm/README.md index b3a5069ab1..eadfc2f5d4 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/README.md +++ b/ChatQnA/docker_compose/amd/gpu/rocm/README.md @@ -65,7 +65,7 @@ Prepare and upload test document # download pdf file wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -100,7 +100,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 3. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 4. Build MegaService Docker Image @@ -144,7 +144,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: 1. `opea/retriever:latest` -2. `opea/dataprep-redis:latest` +2. `opea/dataprep:latest` 3. `opea/chatqna:latest` 4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest` 5. `opea/nginx:latest` @@ -192,9 +192,9 @@ Change the `xxx_MODEL_ID` below for your needs. export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna" - export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep" - export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file" - export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file" + export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest" + export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get" + export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete" export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP} export CHATQNA_FRONTEND_SERVICE_PORT=5173 export CHATQNA_BACKEND_SERVICE_NAME=chatqna @@ -331,7 +331,7 @@ If you want to update the default knowledge base, you can use the following comm Update Knowledge Base via Local File Upload: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -341,7 +341,7 @@ This command updates a knowledge base by uploading a local file for processing. Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -351,7 +351,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro Also, you are able to get the file list that you uploaded: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` @@ -359,17 +359,17 @@ To delete the file/link you uploaded: ```bash # delete link -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "https://opea.dev"}' \ -H "Content-Type: application/json" # delete file -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "nke-10k-2023.pdf"}' \ -H "Content-Type: application/json" # delete all uploaded files and links -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml index 1a7b9ad9b4..421d2abacd 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -9,13 +9,13 @@ services: - "${CHATQNA_REDIS_VECTOR_PORT}:6379" - "${CHATQNA_REDIS_VECTOR_INSIGHT_PORT}:8001" chatqna-dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - chatqna-redis-vector-db - chatqna-tei-embedding-service ports: - - "${CHATQNA_REDIS_DATAPREP_PORT}:6007" + - "${CHATQNA_REDIS_DATAPREP_PORT}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh index 0a581dc867..8071ebdd99 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -19,9 +19,9 @@ export CHATQNA_INDEX_NAME="rag-redis" export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna" -export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep" -export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file" -export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file" +export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest" +export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get" +export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete" export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP} export CHATQNA_FRONTEND_SERVICE_PORT=15173 export CHATQNA_BACKEND_SERVICE_NAME=chatqna diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md index 9297cff2e5..201819e656 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md +++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md @@ -27,7 +27,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 2. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . cd .. ``` @@ -60,7 +60,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following Docker Images: -1. `opea/dataprep-redis:latest` +1. `opea/dataprep:latest` 2. `opea/retriever:latest` 3. `opea/chatqna:latest` 4. `opea/chatqna-ui:latest` @@ -191,7 +191,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -201,7 +201,7 @@ This command updates a knowledge base by uploading a local file for processing. Alternatively, you can add knowledge base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -211,7 +211,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro To check the uploaded files, you are able to get the file list that uploaded: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml index 7cc74b0f37..f765d3aa51 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -146,7 +146,7 @@ services: - BACKEND_SERVICE_IP=chatqna-aipc-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 750cca5887..40610ad73d 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -113,7 +113,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 2. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . cd .. ``` @@ -168,7 +168,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: -1. `opea/dataprep-redis:latest` +1. `opea/dataprep:latest` 2. `opea/retriever:latest` 3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest` 4. `opea/chatqna-ui:latest` @@ -372,7 +372,7 @@ wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrie Upload: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -382,7 +382,7 @@ This command updates a knowledge base by uploading a local file for processing. Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -392,7 +392,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro Also, you are able to get the file list that you uploaded: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` @@ -417,21 +417,21 @@ Then you will get the response JSON like this. Notice that the returned `name`/` To delete the file/link you uploaded: -The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. +The `file_path` here should be the `id` get from `/v1/dataprep/get` API. ```bash # delete link -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "https://opea.dev.txt"}' \ -H "Content-Type: application/json" # delete file -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "nke-10k-2023.pdf"}' \ -H "Content-Type: application/json" # delete all uploaded files and links -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index c87a0a81cf..e7b564db2e 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -116,7 +116,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 2. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . cd .. ``` @@ -171,7 +171,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: -1. `opea/dataprep-pinecone:latest` +1. `opea/dataprep:latest` 2. `opea/retriever:latest` 3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest` 4. `opea/chatqna-ui:latest` @@ -360,7 +360,7 @@ wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrie Upload: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -370,7 +370,7 @@ This command updates a knowledge base by uploading a local file for processing. Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -381,7 +381,7 @@ To delete the files/link you uploaded: ```bash # delete all uploaded files and links -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md index a77ebf0f7d..c0b1734362 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md @@ -81,7 +81,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 2. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . cd .. ``` @@ -115,7 +115,7 @@ Build frontend Docker image that enables Conversational experience with ChatQnA ```bash cd GenAIExamples/ChatQnA/ui export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep/ingest" docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . cd ../../../.. ``` @@ -129,7 +129,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: -1. `opea/dataprep-qdrant:latest` +1. `opea/dataprep:latest` 2. `opea/retriever:latest` 3. `opea/chatqna:latest` 4. `opea/chatqna-ui:latest` @@ -275,7 +275,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v Update Knowledge Base via Local File Upload: ```bash - curl -X POST "http://${host_ip}:6043/v1/dataprep" \ + curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./your_file.pdf" ``` @@ -285,7 +285,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v Add Knowledge Base via HTTP Links: ```bash - curl -X POST "http://${host_ip}:6043/v1/dataprep" \ + curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml index 544d40b579..3c3e6f49a7 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -153,7 +153,7 @@ services: - BACKEND_SERVICE_IP=chatqna-xeon-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 5378b581ef..de784dfabd 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -6,12 +6,12 @@ version: "3.8" services: dataprep-pinecone-service: - image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-pinecone-server depends_on: - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -21,6 +21,7 @@ services: TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE" tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server @@ -142,7 +143,7 @@ services: - BACKEND_SERVICE_IP=chatqna-xeon-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-pinecone-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml index c3a2d00dc8..46123d3e90 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml @@ -9,13 +9,13 @@ services: - "6333:6333" - "6334:6334" dataprep-qdrant-service: - image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-qdrant-server depends_on: - qdrant-vector-db - tei-embedding-service ports: - - "6043:6007" + - "6043:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -25,6 +25,7 @@ services: QDRANT_INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT" tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server @@ -148,7 +149,7 @@ services: - BACKEND_SERVICE_IP=chatqna-xeon-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-qdrant-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 6e94a9f998..5831181370 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -149,7 +149,7 @@ services: - BACKEND_SERVICE_IP=chatqna-xeon-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index dd675dd0dd..917c6ee078 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -130,7 +130,7 @@ services: - BACKEND_SERVICE_IP=chatqna-xeon-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index aa0e150fb1..02e4182050 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -86,7 +86,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 2. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 3. Build Guardrails Docker Image (Optional) @@ -159,7 +159,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: - `opea/retriever:latest` -- `opea/dataprep-redis:latest` +- `opea/dataprep:latest` - `opea/chatqna:latest` - `opea/chatqna-ui:latest` - `opea/nginx:latest` @@ -376,7 +376,7 @@ If you want to update the default knowledge base, you can use the following comm Update Knowledge Base via Local File Upload: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -386,7 +386,7 @@ This command updates a knowledge base by uploading a local file for processing. Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -396,7 +396,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro Also, you are able to get the file/link list that you uploaded: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` @@ -423,17 +423,17 @@ To delete the file/link you uploaded: ```bash # delete link -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "https://opea.dev.txt"}' \ -H "Content-Type: application/json" # delete file -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "nke-10k-2023.pdf"}' \ -H "Content-Type: application/json" # delete all uploaded files and links -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 8c21325773..6cf9ce67ef 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -161,7 +161,7 @@ services: - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 936be4045c..6118f74a37 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -201,7 +201,7 @@ services: - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index cc75704aef..fd09988a91 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -180,7 +180,7 @@ services: - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 8b800525e9..9afde50e46 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -134,7 +134,7 @@ services: - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index 3834d5b8cc..7d4e40b6aa 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -45,7 +45,7 @@ CONTAINER ID IMAGE COMMAND bee1132464cd opea/chatqna:latest "python chatqna.py" 2 minutes ago Up 2 minutes 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp chatqna-gaudi-backend-server f810f3b4d329 opea/embedding:latest "python embedding_te…" 2 minutes ago Up 2 minutes 0.0.0.0:6000->6000/tcp, :::6000->6000/tcp embedding-server 325236a01f9b opea/llm-textgen:latest "python llm.py" 2 minutes ago Up 2 minutes 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp llm-textgen-gaudi-server -2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server +2fa17d84605f opea/dataprep:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->5000/tcp dataprep-redis-server 69e1fb59e92c opea/retriever:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server 313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server 174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:1.5.0 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index edf9dc12f4..b284ab47df 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -110,7 +110,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 3. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 4. Build MegaService Docker Image @@ -154,7 +154,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a Then run the command `docker images`, you will have the following 5 Docker Images: 1. `opea/retriever:latest` -2. `opea/dataprep-redis:latest` +2. `opea/dataprep:latest` 3. `opea/chatqna:latest` 4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest` 5. `opea/nginx:latest` @@ -314,7 +314,7 @@ If you want to update the default knowledge base, you can use the following comm Update Knowledge Base via Local File Upload: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -324,7 +324,7 @@ This command updates a knowledge base by uploading a local file for processing. Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -334,7 +334,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro Also, you are able to get the file list that you uploaded: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` @@ -342,17 +342,17 @@ To delete the file/link you uploaded: ```bash # delete link -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "https://opea.dev"}' \ -H "Content-Type: application/json" # delete file -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "nke-10k-2023.pdf"}' \ -H "Content-Type: application/json" # delete all uploaded files and links -curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml index 40f45491c8..7684ca96a3 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml +++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml @@ -9,13 +9,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/ChatQnA/docker_compose/nvidia/gpu/set_env.sh b/ChatQnA/docker_compose/nvidia/gpu/set_env.sh index f97d07e20c..dd7421e683 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/set_env.sh +++ b/ChatQnA/docker_compose/nvidia/gpu/set_env.sh @@ -12,9 +12,9 @@ export INDEX_NAME="rag-redis" export MEGA_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" export FRONTEND_SERVICE_IP=${host_ip} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=chatqna diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 7ae42b6029..71cea18109 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -65,24 +65,12 @@ services: dockerfile: comps/llms/src/text-generation/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - dataprep-redis: + dataprep: build: context: GenAIComps - dockerfile: comps/dataprep/redis/langchain/Dockerfile + dockerfile: comps/dataprep/src/Dockerfile extends: chatqna - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} - dataprep-qdrant: - build: - context: GenAIComps - dockerfile: comps/dataprep/qdrant/langchain/Dockerfile - extends: chatqna - image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest} - dataprep-pinecone: - build: - context: GenAIComps - dockerfile: comps/dataprep/pinecone/langchain/Dockerfile - extends: chatqna - image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} guardrails: build: context: GenAIComps diff --git a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml index 85844c7c74..45c333e379 100644 --- a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml +++ b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml @@ -70,7 +70,7 @@ spec: internalService: serviceName: data-prep-svc config: - endpoint: /v1/dataprep + endpoint: /v1/dataprep/ingest REDIS_URL: redis-vector-db TEI_ENDPOINT: tei-embedding-gaudi-svc isDownstreamService: true diff --git a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml index 64265e99fa..137c5d1aeb 100644 --- a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml +++ b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml @@ -70,7 +70,7 @@ spec: internalService: serviceName: data-prep-svc config: - endpoint: /v1/dataprep + endpoint: /v1/dataprep/ingest REDIS_URL: redis-vector-db TEI_ENDPOINT: tei-embedding-svc isDownstreamService: true diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh index f36882a82c..22de3e78ac 100644 --- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever vllm-gaudi guardrails nginx" + service_list="chatqna-guardrails chatqna-ui dataprep retriever vllm-gaudi guardrails nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index f352a75d9b..099f032bcb 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever vllm-gaudi nginx" + service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/ChatQnA/tests/test_compose_on_rocm.sh b/ChatQnA/tests/test_compose_on_rocm.sh index f7d2de19d3..e73a0aef58 100644 --- a/ChatQnA/tests/test_compose_on_rocm.sh +++ b/ChatQnA/tests/test_compose_on_rocm.sh @@ -31,9 +31,9 @@ export CHATQNA_INDEX_NAME="rag-redis" export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna" -export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep" -export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file" -export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file" +export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest" +export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get" +export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete" export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP} export CHATQNA_FRONTEND_SERVICE_PORT=15173 export CHATQNA_BACKEND_SERVICE_NAME=chatqna @@ -64,7 +64,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm @@ -147,31 +147,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > "$LOG_PATH"/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" - # test /v1/dataprep upload link + # test /v1/dataprep/ingest upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" - # test /v1/dataprep/get_file + # test /v1/dataprep/get validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh index a9d4373979..80c74f1a39 100644 --- a/ChatQnA/tests/test_compose_on_xeon.sh +++ b/ChatQnA/tests/test_compose_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/vllm-project/vllm.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index d525cdb80a..4f6dd1158e 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/vllm-project/vllm.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-pinecone retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -110,18 +110,18 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-pinecone-server" - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-pinecone-server" diff --git a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh index 299a7def1a..d0ad922fd1 100644 --- a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh +++ b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/vllm-project/vllm.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-qdrant retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -99,17 +99,17 @@ function validate_microservices() { "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "${ip_address}:6043/v1/dataprep" \ + "${ip_address}:6043/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-qdrant-server" # test upload link validate_service \ - "${ip_address}:6043/v1/dataprep" \ + "${ip_address}:6043/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-qdrant-server" diff --git a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh index e20cde38a3..303df2b61c 100644 --- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh @@ -31,7 +31,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 @@ -124,28 +124,28 @@ function validate_microservices() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" # test /v1/dataprep/get_file validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" # test /v1/dataprep/delete_file validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" diff --git a/ChatQnA/tests/test_compose_tgi_on_xeon.sh b/ChatQnA/tests/test_compose_tgi_on_xeon.sh index 47145bae01..0746756f30 100644 --- a/ChatQnA/tests/test_compose_tgi_on_xeon.sh +++ b/ChatQnA/tests/test_compose_tgi_on_xeon.sh @@ -31,7 +31,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu @@ -121,28 +121,28 @@ function validate_microservices() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" # test /v1/dataprep/get_file validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" # test /v1/dataprep/delete_file validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh index 34e1af18e0..58d3f71c74 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever vllm-gaudi nginx" + service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm-gaudi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -105,31 +105,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" - # test /v1/dataprep upload link + # test /v1/dataprep/ingest upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" - # test /v1/dataprep/get_file + # test /v1/dataprep/get validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" diff --git a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh index 7d2858e41a..ebb76eb6ca 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { git clone https://github.com/vllm-project/vllm.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever vllm nginx" + service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -104,31 +104,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" - # test /v1/dataprep upload link + # test /v1/dataprep/ingest upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" - # test /v1/dataprep/get_file + # test /v1/dataprep/get validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" diff --git a/ChatQnA/tests/test_gmc_on_gaudi.sh b/ChatQnA/tests/test_gmc_on_gaudi.sh index b4a7c482f9..1cbfb02dd3 100755 --- a/ChatQnA/tests/test_gmc_on_gaudi.sh +++ b/ChatQnA/tests/test_gmc_on_gaudi.sh @@ -94,7 +94,7 @@ function validate_chatqna_dataprep() { export CLIENT_POD=$(kubectl get pod -n $CHATQNA_DATAPREP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $CHATQNA_DATAPREP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep" -X POST -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log + kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep/ingest" -X POST -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/ChatQnA/tests/test_gmc_on_xeon.sh b/ChatQnA/tests/test_gmc_on_xeon.sh index 97c8ae97f4..473e10d3fe 100755 --- a/ChatQnA/tests/test_gmc_on_xeon.sh +++ b/ChatQnA/tests/test_gmc_on_xeon.sh @@ -96,7 +96,7 @@ function validate_chatqna_dataprep() { export CLIENT_POD=$(kubectl get pod -n $CHATQNA_DATAPREP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $CHATQNA_DATAPREP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep" -X POST -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log + kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep/ingest" -X POST -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/ChatQnA/ui/react/.env b/ChatQnA/ui/react/.env index ae0bd3732c..cd1bea7ced 100644 --- a/ChatQnA/ui/react/.env +++ b/ChatQnA/ui/react/.env @@ -1,2 +1,2 @@ VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna -VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep +VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep/ingest diff --git a/ChatQnA/ui/svelte/.env b/ChatQnA/ui/svelte/.env index d6f7643b62..28aeea4f7b 100644 --- a/ChatQnA/ui/svelte/.env +++ b/ChatQnA/ui/svelte/.env @@ -1,7 +1,7 @@ CHAT_BASE_URL = '/v1/chatqna' -UPLOAD_FILE_BASE_URL = '/v1/dataprep' +UPLOAD_FILE_BASE_URL = '/v1/dataprep/ingest' -GET_FILE = '/v1/dataprep/get_file' +GET_FILE = '/v1/dataprep/get' -DELETE_FILE = '/v1/dataprep/delete_file' +DELETE_FILE = '/v1/dataprep/delete' diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index 5699ece356..512f1b160a 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -27,7 +27,7 @@ DocRetriever are the most widely adopted use case for leveraging the different m - Dataprep Image ```bash - docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile . + docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ## 2. Build Images for MegaService @@ -57,7 +57,7 @@ export RETRIEVER_SERVICE_HOST_IP=${host_ip} export RERANK_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/ docker compose up -d ``` @@ -78,7 +78,7 @@ docker compose -f compose_without_rerank.yaml up -d Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml index 6384312e9b..d4bfe0446f 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml @@ -11,12 +11,12 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db ports: - - "6007:6007" + - "6007:5000" - "6008:6008" - "6009:6009" environment: diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index 81baf2da3a..68afbf18e7 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -11,12 +11,12 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db ports: - - "6007:6007" + - "6007:5000" - "6008:6008" - "6009:6009" environment: diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md index f2de0048a8..433206dfe6 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md @@ -27,7 +27,7 @@ DocRetriever are the most widely adopted use case for leveraging the different m - Dataprep Image ```bash - docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile . + docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ## 2. Build Images for MegaService @@ -57,7 +57,7 @@ export RETRIEVER_SERVICE_HOST_IP=${host_ip} export RERANK_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" export llm_hardware='cpu/xeon' #cpu/xeon, xpu, hpu/gaudi cd GenAIExamples/DocIndexRetriever/intel/hpu/gaudi/ docker compose up -d @@ -68,7 +68,7 @@ docker compose up -d Add Knowledge Base via HTTP Links: ```bash -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml index a73970f36c..eedbe66719 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml @@ -12,13 +12,13 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db - tei-embedding-service ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocIndexRetriever/docker_image_build/build.yaml b/DocIndexRetriever/docker_image_build/build.yaml index 4619a9962d..80753e8946 100644 --- a/DocIndexRetriever/docker_image_build/build.yaml +++ b/DocIndexRetriever/docker_image_build/build.yaml @@ -29,9 +29,9 @@ services: dockerfile: comps/rerankings/src/Dockerfile extends: doc-index-retriever image: ${REGISTRY:-opea}/reranking:${TAG:-latest} - dataprep-redis: + dataprep: build: context: GenAIComps - dockerfile: comps/dataprep/redis/langchain/Dockerfile + dockerfile: comps/dataprep/src/Dockerfile extends: doc-index-retriever - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh index 813df8836a..2176caf638 100644 --- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh @@ -72,7 +72,7 @@ function validate() { function validate_megaservice() { echo "=========Ingest data==================" - local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]') local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-gaudi") diff --git a/DocIndexRetriever/tests/test_compose_on_xeon.sh b/DocIndexRetriever/tests/test_compose_on_xeon.sh index 449e73fd1b..1e490a517d 100644 --- a/DocIndexRetriever/tests/test_compose_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { if [ ! -d "GenAIComps" ] ; then git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git fi - service_list="dataprep-redis embedding retriever reranking doc-index-retriever" + service_list="dataprep embedding retriever reranking doc-index-retriever" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -72,7 +72,7 @@ function validate() { function validate_megaservice() { echo "===========Ingest data==================" - local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev/"]') local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") diff --git a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh index 3da96df4ca..ddd62ebd8a 100644 --- a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh @@ -20,7 +20,7 @@ function build_docker_images() { if [ ! -d "GenAIComps" ] ; then git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git fi - service_list="dataprep-redis embedding retriever doc-index-retriever" + service_list="dataprep embedding retriever doc-index-retriever" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -66,7 +66,7 @@ function validate() { function validate_megaservice() { echo "===========Ingest data==================" - local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev/"]') local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") diff --git a/FaqGen/faqgen.py b/FaqGen/faqgen.py index 01d1e4acbf..b4c128146d 100644 --- a/FaqGen/faqgen.py +++ b/FaqGen/faqgen.py @@ -58,10 +58,28 @@ def read_text_from_file(file, save_file_name): return file_content +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.LLM: + for key_to_replace in ["text"]: + if key_to_replace in inputs: + inputs["messages"] = inputs[key_to_replace] + del inputs[key_to_replace] + + if "id" in inputs: + del inputs["id"] + if "max_new_tokens" in inputs: + del inputs["max_new_tokens"] + if "input" in inputs: + del inputs["input"] + + return inputs + + class FaqGenService: def __init__(self, host="0.0.0.0", port=8000): self.host = host self.port = port + ServiceOrchestrator.align_inputs = align_inputs self.megaservice = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.FAQ_GEN) diff --git a/GraphRAG/README.md b/GraphRAG/README.md index d8e4a17ca7..d654357d44 100644 --- a/GraphRAG/README.md +++ b/GraphRAG/README.md @@ -72,7 +72,7 @@ Here is an example of `Nike 2023` pdf. # download pdf file wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6004/v1/dataprep" \ +curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -172,7 +172,7 @@ Gaudi default compose.yaml | Embedding | Llama-index | Xeon | 6006 | /v1/embaddings | | Retriever | Llama-index, Neo4j | Xeon | 6009 | /v1/retrieval | | LLM | Llama-index, TGI | Gaudi | 6005 | /v1/chat/completions | -| Dataprep | Neo4j, LlamaIndex | Xeon | 6004 | /v1/dataprep | +| Dataprep | Neo4j, LlamaIndex | Xeon | 6004 | /v1/dataprep/ingest | ### Models Selection @@ -207,7 +207,7 @@ Here is an example of `Nike 2023` pdf. # download pdf file wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6007/v1/dataprep" \ +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml index baf7b95a9d..b3801ce181 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml @@ -66,14 +66,14 @@ services: ipc: host command: --model-id ${LLM_MODEL_ID} --max-input-length 6000 --max-total-tokens 8192 dataprep-neo4j-llamaindex: - image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-neo4j-server depends_on: - neo4j-apoc - tgi-gaudi-service - tei-embedding-service ports: - - "6004:6004" + - "6004:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -85,6 +85,7 @@ services: NEO4J_URL: ${NEO4J_URL} NEO4J_USERNAME: ${NEO4J_USERNAME} NEO4J_PASSWORD: ${NEO4J_PASSWORD} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX" TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} OPENAI_API_KEY: ${OPENAI_API_KEY} @@ -177,7 +178,7 @@ services: - BACKEND_SERVICE_IP=graphrag-gaudi-backend-server - BACKEND_SERVICE_PORT=8888 - DATAPREP_SERVICE_IP=dataprep-neo4j-llamaindex - - DATAPREP_SERVICE_PORT=6004 + - DATAPREP_SERVICE_PORT=5000 ipc: host restart: always networks: diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh index 90e32f2547..41296b936e 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh @@ -18,6 +18,6 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" export TGI_LLM_ENDPOINT="http://${host_ip}:6005" export NEO4J_URL="bolt://${host_ip}:7687" export NEO4J_USERNAME=neo4j -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" export LOGFLAG=True export RETRIEVER_SERVICE_PORT=80 diff --git a/GraphRAG/docker_image_build/build.yaml b/GraphRAG/docker_image_build/build.yaml index 870b15a674..2f2b424a74 100644 --- a/GraphRAG/docker_image_build/build.yaml +++ b/GraphRAG/docker_image_build/build.yaml @@ -20,15 +20,15 @@ services: context: GenAIComps dockerfile: comps/retrievers/src/Dockerfile image: ${REGISTRY:-opea}/retriever:${TAG:-latest} - dataprep-neo4j-llamaindex: + dataprep: build: args: http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: ${no_proxy} context: GenAIComps - dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile - image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest} + dockerfile: comps/dataprep/src/Dockerfile + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} nginx: build: args: diff --git a/GraphRAG/tests/test_compose_on_gaudi.sh b/GraphRAG/tests/test_compose_on_gaudi.sh index 10d118fb87..409b498f38 100755 --- a/GraphRAG/tests/test_compose_on_gaudi.sh +++ b/GraphRAG/tests/test_compose_on_gaudi.sh @@ -129,10 +129,10 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep graph extraction + # test /v1/dataprep/ingest graph extraction echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6004/v1/dataprep" \ + "http://${ip_address}:6004/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ "dataprep-neo4j-server" diff --git a/GraphRAG/ui/react/.env b/GraphRAG/ui/react/.env index ae0bd3732c..cd1bea7ced 100644 --- a/GraphRAG/ui/react/.env +++ b/GraphRAG/ui/react/.env @@ -1,2 +1,2 @@ VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna -VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep +VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep/ingest diff --git a/GraphRAG/ui/svelte/.env b/GraphRAG/ui/svelte/.env index 36c251a827..cb8cc7e120 100644 --- a/GraphRAG/ui/svelte/.env +++ b/GraphRAG/ui/svelte/.env @@ -1,7 +1,7 @@ CHAT_BASE_URL = '/v1/graphrag' -UPLOAD_FILE_BASE_URL = '/v1/dataprep' +UPLOAD_FILE_BASE_URL = '/v1/dataprep/ingest' -GET_FILE = '/v1/dataprep/get_file' +GET_FILE = '/v1/dataprep/get' -DELETE_FILE = '/v1/dataprep/delete_file' +DELETE_FILE = '/v1/dataprep/delete' diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md index af0812d84d..e49b264823 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md @@ -51,7 +51,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 4. Build dataprep-multimodal-redis Image ```bash -docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 5. Build MegaService Docker Image @@ -83,7 +83,7 @@ docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm Then run the command `docker images`, you will have the following 8 Docker Images: -1. `opea/dataprep-multimodal-redis:latest` +1. `opea/dataprep:latest` 2. `ghcr.io/huggingface/text-generation-inference:2.4.1-rocm` 3. `opea/lvm:latest` 4. `opea/retriever:latest` diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml index e38f175f94..af4855bb59 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -20,13 +20,13 @@ services: - "6379:6379" - "8001:8001" dataprep-multimodal-redis: - image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-multimodal-redis depends_on: - redis-vector-db - lvm ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -36,6 +36,8 @@ services: INDEX_NAME: ${INDEX_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm" HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" restart: unless-stopped embedding-multimodal-bridgetower: image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh index 0102769375..c271a4b553 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -26,8 +26,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} export LVM_SERVICE_HOST_IP=${HOST_IP} export MEGA_SERVICE_HOST_IP=${HOST_IP} export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna" -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/ingest_with_text" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get_files" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete_files" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/ingest" +export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_transcripts" +export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_captions" +export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/delete" diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md index 5a72491c32..6ce1174782 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md @@ -94,11 +94,11 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" +export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" ``` Note: Please replace with `host_ip` with you external IP address, do not use localhost. @@ -144,7 +144,7 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy ### 4. Build dataprep-multimodal-redis Image ```bash -docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 5. Build asr images @@ -178,7 +178,7 @@ cd ../../../ Then run the command `docker images`, you will have the following 11 Docker Images: -1. `opea/dataprep-multimodal-redis:latest` +1. `opea/dataprep:latest` 2. `opea/lvm:latest` 3. `opea/lvm-llava:latest` 4. `opea/retriever:latest` diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml index 48c40f3bb3..0c1557a58d 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -20,13 +20,13 @@ services: - "6379:6379" - "8001:8001" dataprep-multimodal-redis: - image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-multimodal-redis depends_on: - redis-vector-db - lvm-llava ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -36,6 +36,8 @@ services: INDEX_NAME: ${INDEX_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm" HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" restart: unless-stopped embedding-multimodal-bridgetower: image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh index 5b41e456ca..483f4d1ed2 100755 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -28,8 +28,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" +export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md index 598797b74f..4519266098 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md @@ -43,11 +43,11 @@ export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr" export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" +export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" ``` Note: Please replace with `host_ip` with you external IP address, do not use localhost. @@ -95,7 +95,7 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy ### 4. Build dataprep-multimodal-redis Image ```bash -docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 5. Build asr images @@ -127,7 +127,7 @@ docker build --no-cache -t opea/multimodalqna-ui:latest --build-arg https_proxy= Then run the command `docker images`, you will have the following 11 Docker Images: -1. `opea/dataprep-multimodal-redis:latest` +1. `opea/dataprep:latest` 2. `opea/lvm:latest` 3. `ghcr.io/huggingface/tgi-gaudi:2.0.6` 4. `opea/retriever:latest` diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 2a134a548b..0602a8d338 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -20,13 +20,13 @@ services: https_proxy: ${https_proxy} restart: unless-stopped dataprep-multimodal-redis: - image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-multimodal-redis depends_on: - redis-vector-db - lvm ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -36,6 +36,8 @@ services: INDEX_NAME: ${INDEX_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm" HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" restart: unless-stopped embedding-multimodal-bridgetower: image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh index 8fb00423f7..98cabece48 100755 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -28,8 +28,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" +export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" diff --git a/MultimodalQnA/docker_image_build/build.yaml b/MultimodalQnA/docker_image_build/build.yaml index 9c26d99d8e..2a8fecb2a1 100644 --- a/MultimodalQnA/docker_image_build/build.yaml +++ b/MultimodalQnA/docker_image_build/build.yaml @@ -47,12 +47,12 @@ services: dockerfile: comps/lvms/src/Dockerfile extends: multimodalqna image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - dataprep-multimodal-redis: + dataprep: build: context: GenAIComps - dockerfile: comps/dataprep/multimodal/redis/langchain/Dockerfile + dockerfile: comps/dataprep/src/Dockerfile extends: multimodalqna - image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} whisper: build: context: GenAIComps diff --git a/MultimodalQnA/tests/test_compose_on_gaudi.sh b/MultimodalQnA/tests/test_compose_on_gaudi.sh index 8fd99b0dbc..e8491e9c9c 100644 --- a/MultimodalQnA/tests/test_compose_on_gaudi.sh +++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -e +set -x IMAGE_REPO=${IMAGE_REPO:-"opea"} IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" @@ -35,7 +35,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep-multimodal-redis whisper" + service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep whisper" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 @@ -64,11 +64,11 @@ function setup_env() { export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" } function start_services() { @@ -109,7 +109,7 @@ function validate_service() { elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL") else HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi @@ -173,6 +173,11 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" + # test data prep echo "Data Prep with Generating Transcript for Video" validate_service \ @@ -283,6 +288,7 @@ function validate_megaservice() { function validate_delete { echo "Validate data prep delete files" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" validate_service \ "${DATAPREP_DELETE_FILE_ENDPOINT}" \ '{"status":true}' \ diff --git a/MultimodalQnA/tests/test_compose_on_rocm.sh b/MultimodalQnA/tests/test_compose_on_rocm.sh index 8b29e44672..2072040aa3 100644 --- a/MultimodalQnA/tests/test_compose_on_rocm.sh +++ b/MultimodalQnA/tests/test_compose_on_rocm.sh @@ -35,7 +35,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep-multimodal-redis whisper" + service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep whisper" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1m @@ -68,11 +68,11 @@ function setup_env() { export LVM_SERVICE_HOST_IP=${HOST_IP} export MEGA_SERVICE_HOST_IP=${HOST_IP} export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna" - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/ingest_with_text" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get_files" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete_files" + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/get" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/delete" } function start_services() { @@ -111,7 +111,7 @@ function validate_service() { elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL") else HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi @@ -175,6 +175,11 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get" + # test data prep echo "Data Prep with Generating Transcript for Video" validate_service \ @@ -284,6 +289,7 @@ function validate_megaservice() { function validate_delete { echo "Validate data prep delete files" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete" validate_service \ "${DATAPREP_DELETE_FILE_ENDPOINT}" \ '{"status":true}' \ diff --git a/MultimodalQnA/tests/test_compose_on_xeon.sh b/MultimodalQnA/tests/test_compose_on_xeon.sh index 818efe4e05..e40cb3c8d7 100644 --- a/MultimodalQnA/tests/test_compose_on_xeon.sh +++ b/MultimodalQnA/tests/test_compose_on_xeon.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -e +set -x IMAGE_REPO=${IMAGE_REPO:-"opea"} IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" @@ -35,7 +35,7 @@ function build_docker_images() { git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm-llava lvm dataprep-multimodal-redis whisper" + service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm-llava lvm dataprep whisper" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s } @@ -61,11 +61,11 @@ function setup_env() { export LVM_SERVICE_HOST_IP=${host_ip} export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna" - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files" + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" } function start_services() { @@ -103,7 +103,7 @@ function validate_service() { elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL") else HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi @@ -167,6 +167,11 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" + export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_transcripts" + export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_captions" + export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" + # test data prep echo "Data Prep with Generating Transcript for Video" validate_service \ @@ -276,6 +281,7 @@ function validate_megaservice() { function validate_delete { echo "Validate data prep delete files" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" validate_service \ "${DATAPREP_DELETE_FILE_ENDPOINT}" \ '{"status":true}' \ diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index 75ffa4483e..5ab4816096 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -39,7 +39,7 @@ docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$http ### 5. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 6. Build Prompt Registry Image @@ -158,12 +158,12 @@ export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" @@ -347,7 +347,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det Update Knowledge Base via Local File Upload: ```bash - curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -357,7 +357,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det Add Knowledge Base via HTTP Links: ```bash - curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]' ``` @@ -367,7 +367,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det Also, you are able to get the file list that you uploaded: ```bash - curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ -H "Content-Type: application/json" ``` @@ -375,17 +375,17 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det ```bash # delete link - curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "https://opea.dev.txt"}' \ -H "Content-Type: application/json" # delete file - curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "nke-10k-2023.pdf"}' \ -H "Content-Type: application/json" # delete all uploaded files and links - curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ -d '{"file_path": "all"}' \ -H "Content-Type: application/json" ``` diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 1872f12923..ee7d23a640 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -15,12 +15,12 @@ services: - "6379:6379" - "8001:8001" dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server depends_on: - redis-vector-db ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh index c1c8af918d..9ea771239d 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh @@ -32,12 +32,12 @@ export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete" export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get" export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" diff --git a/ProductivitySuite/docker_image_build/build.yaml b/ProductivitySuite/docker_image_build/build.yaml index dd8da57399..807aa1242c 100644 --- a/ProductivitySuite/docker_image_build/build.yaml +++ b/ProductivitySuite/docker_image_build/build.yaml @@ -35,12 +35,12 @@ services: dockerfile: comps/llms/src/text-generation/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - dataprep-redis: + dataprep: build: context: GenAIComps - dockerfile: comps/dataprep/redis/langchain/Dockerfile + dockerfile: comps/dataprep/src/Dockerfile extends: chatqna - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} promptregistry-mongo-server: build: context: GenAIComps diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml index c921efea55..af2a156e60 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml @@ -499,11 +499,11 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "opea/dataprep-redis:latest" + image: "opea/dataprep:latest" imagePullPolicy: IfNotPresent ports: - name: data-prep - containerPort: 6007 + containerPort: 5000 protocol: TCP volumeMounts: - mountPath: /tmp diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh index fa0b6e2a4a..ce85a75cb2 100755 --- a/ProductivitySuite/tests/test_compose_on_xeon.sh +++ b/ProductivitySuite/tests/test_compose_on_xeon.sh @@ -58,10 +58,10 @@ function start_services() { export TGI_LLM_ENDPOINT_DOCSUM="http://${ip_address}:9009" export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${ip_address}:8888/v1/chatqna" export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${ip_address}:8889/v1/faqgen" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/delete" export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${ip_address}:7778/v1/codegen" - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/ingest" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get" + export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/ingest" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/get" export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create" export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create" export CHAT_HISTORY_DELETE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/delete" @@ -146,6 +146,34 @@ function validate_service() { sleep 1s } + +function validate_faqgen() { + local URL="$1" + local SERVICE_NAME="$2" + local DOCKER_NAME="$3" + local EXPECTED_RESULT="Embeddings" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F "max_tokens=32" -F "stream=False" -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -F "max_tokens=32" -F "stream=False" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + function validate_microservices() { # Check if the microservices are running correctly. @@ -167,31 +195,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6007/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "dataprep-redis-server" - # test /v1/dataprep upload file + # test /v1/dataprep/ingest upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-redis-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:6007/v1/dataprep" \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "dataprep-redis-server" - # test /v1/dataprep/get_file + # test /v1/dataprep/get validate_service \ - "http://${ip_address}:6007/v1/dataprep/get_file" \ + "http://${ip_address}:6007/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "dataprep-redis-server" @@ -238,12 +266,10 @@ function validate_microservices() { '{"query":"What is Deep Learning?"}' # FAQGen llm microservice - validate_service \ + validate_faqgen \ "${ip_address}:9002/v1/faqgen" \ - "data: " \ "llm_faqgen" \ - "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + "llm-faqgen-server" # CodeGen llm microservice validate_service \ @@ -287,10 +313,8 @@ function validate_microservices() { } - function validate_megaservice() { - # Curl the ChatQnAMega Service validate_service \ "${ip_address}:8888/v1/chatqna" \ @@ -300,12 +324,10 @@ function validate_megaservice() { '{"messages": "What is the revenue of Nike in 2023?"}'\ # Curl the FAQGen Service - validate_service \ + validate_faqgen \ "${ip_address}:8889/v1/faqgen" \ - "Text Embeddings Inference" \ - "faqgen-xeon-backend-server" \ "faqgen-xeon-backend-server" \ - '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'\ + "faqgen-xeon-backend-server" # Curl the CodeGen Mega Service validate_service \ diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/README.md b/VideoQnA/docker_compose/intel/cpu/xeon/README.md index 6c5af3d84f..478f99fe38 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VideoQnA/docker_compose/intel/cpu/xeon/README.md @@ -80,7 +80,7 @@ docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg ### 5. Build Dataprep Image ```bash -docker build -t opea/dataprep-multimodal-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile . +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . ``` ### 6. Build MegaService Docker Image @@ -106,7 +106,7 @@ docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --b Then run the command `docker images`, you will have the following 8 Docker Images: -1. `opea/dataprep-multimodal-vdms:latest` +1. `opea/dataprep:latest` 2. `opea/embedding-multimodal-clip:latest` 3. `opea/retriever:latest` 4. `opea/reranking:latest` @@ -161,8 +161,8 @@ export LVM_SERVICE_HOST_IP=${host_ip} export LVM_ENDPOINT="http://${host_ip}:9009" export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" export VDMS_HOST=${host_ip} @@ -195,7 +195,7 @@ docker compose up vdms-vector-db dataprep -d sleep 1m # wait for the services ready # Insert some sample data to the DB -curl -X POST http://${host_ip}:6007/v1/dataprep \ +curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \ -H "Content-Type: multipart/form-data" \ -F "files=@./data/op_1_0320241830.mp4" diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml index f52ceef414..780ff3c704 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -10,12 +10,12 @@ services: ports: - "8001:55555" dataprep: - image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-vdms-server depends_on: - vdms-vector-db ports: - - "6007:6007" + - "6007:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -23,6 +23,7 @@ services: VDMS_HOST: ${VDMS_HOST} VDMS_PORT: ${VDMS_PORT} INDEX_NAME: ${INDEX_NAME} + MULTIMODAL_DATAPREP: true entrypoint: sh -c 'sleep 15 && python ingest_videos.py' volumes: - /home/$USER/.cache/clip:/home/user/.cache/clip diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh index 200ed77e22..dcf574774b 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -17,8 +17,8 @@ export LVM_SERVICE_HOST_IP=${host_ip} export LVM_ENDPOINT="http://${host_ip}:9009" export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" export VDMS_HOST=${host_ip} diff --git a/VideoQnA/docker_image_build/build.yaml b/VideoQnA/docker_image_build/build.yaml index 8f000f7295..9ed0bee955 100644 --- a/VideoQnA/docker_image_build/build.yaml +++ b/VideoQnA/docker_image_build/build.yaml @@ -17,12 +17,12 @@ services: dockerfile: ./docker/Dockerfile extends: videoqna image: ${REGISTRY:-opea}/videoqna-ui:${TAG:-latest} - dataprep-multimodal-vdms: + dataprep: build: context: GenAIComps - dockerfile: comps/dataprep/vdms/multimodal_langchain/Dockerfile + dockerfile: comps/dataprep/src/Dockerfile extends: videoqna - image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} embedding-multimodal-clip: build: context: GenAIComps diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh index 96c641fe04..f8d703027e 100755 --- a/VideoQnA/tests/test_compose_on_xeon.sh +++ b/VideoQnA/tests/test_compose_on_xeon.sh @@ -35,7 +35,7 @@ function start_services() { sleep 30s # Insert some sample data to the DB - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \ + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ -H "Content-Type: multipart/form-data" \ -F "files=@./data/op_1_0320241830.mp4") @@ -142,7 +142,7 @@ function validate_microservices() { cd $WORKPATH/docker_compose/intel/cpu/xeon//data # dataprep microservice - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \ + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ -H "Content-Type: multipart/form-data" \ -F "files=@./op_1_0320241830.mp4") diff --git a/docker_images_list.md b/docker_images_list.md index e487fef948..b3f62527b5 100644 --- a/docker_images_list.md +++ b/docker_images_list.md @@ -2,7 +2,7 @@ A list of released OPEA docker images in https://hub.docker.com/, contains all relevant images from the GenAIExamples, GenAIComps and GenAIInfra projects. Please expect more public available images in the future release. -Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details. +Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details. ## Example images @@ -45,17 +45,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the | [opea/agent]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use | | [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use | | [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations | -| [opea/dataprep-milvus](https://hub.docker.com/r/opea/dataprep-milvus) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/milvus/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on milvus vectordb for GenAI application use | -| [opea/dataprep-multimodal-vdms](https://hub.docker.com/r/opea/dataprep-multimodal-vdms) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/multimodal_langchain/Dockerfile) | This docker image exposes an OPEA dataprep microservice based on a multi-modal VDMS for use by GenAI applications. | -| [opea/dataprep-multimodal-redis](https://hub.docker.com/r/opea/dataprep-multimodal-redis) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile) | This docker image exposes an OPEA dataprep microservice based on a multi-modal redis for use by GenAI applications. | -| [opea/dataprep-on-ray-redis](https://hub.docker.com/r/opea/dataprep-on-ray-redis) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain_ray/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on redis vectordb and optimized ray for GenAI application use | -| [opea/dataprep-pgvector](https://hub.docker.com/r/opea/dataprep-pgvector) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pgvector/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on pgvector vectordb for GenAI application use | -| [opea/dataprep-pinecone](https://hub.docker.com/r/opea/dataprep-pinecone) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pinecone/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on pincone vectordb for GenAI application use | -| [opea/dataprep-qdrant](https://hub.docker.com/r/opea/dataprep-qdrant) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/qdrant/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on qdrant vectordb for GenAI application use | -| [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on redis vectordb Langchain framework for GenAI application use | -| [opea/dataprep-redis-llama-index](https://hub.docker.com/r/opea/dataprep-redis-llama-index) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/llama_index/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on redis vectordb LlamaIndex framework for GenAI application use | -| [opea/dataprep-vdms](https://hub.docker.com/r/opea/dataprep-vdms) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/langchain/Dockerfile) | This docker image exposes an OPEA dataprep microservice based on VDMS vectordb for use by GenAI applications. | -| [opea/embedding-langchain-mosec](https://hub.docker.com/r/opea/embedding-langchain-mosec) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile) | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use | +| [opea/dataprep]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on many vectordbs for GenAI application use | | [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile) | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications | | [opea/embedding](https://hub.docker.com/r/opea/embedding) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications | | [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications |