You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have searched the existing issues and this bug is not already filed.
My model is hosted on OpenAI or Azure. If not, please look at the "model providers" issue and don't file a new one here.
I believe this is a legitimate bug, not just a question. If this is a question, please use the Discussions area.
Describe the issue
I copied the code from https://github.com/win4r/GraphRAG4OpenWebUI to my local machine (the project uses GraphRAG version 0.3.3), but my GraphRAG is the latest version 1.2.0. The code throws an error: cannot import name 'store_entity_semantic_embeddings' from 'graphrag.query.input.loaders.dfs'. How can I resolve this? here is the code below:
import os
import asyncio
import time
import uuid
import json
import re
import pandas as pd
import tiktoken
import logging
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any, Union
from contextlib import asynccontextmanager
from tavily import TavilyClient
GraphRAG 相关导入
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import LocalSearchMixedContext
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.query.structured_search.global_search.community_context import GlobalCommunityContext
from graphrag.query.structured_search.global_search.search import GlobalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
formatted_paragraphs = []
for para in paragraphs:
if '```' in para:
parts = para.split('```')
for i, part in enumerate(parts):
if i % 2 == 1: # 这是代码块
parts[i] = f"\n```\n{part.strip()}\n```\n"
para = ''.join(parts)
else:
para = para.replace('. ', '.\n')
formatted_paragraphs.append(para.strip())
return '\n\n'.join(formatted_paragraphs)
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
if not local_search_engine or not global_search_engine:
logger.error("搜索引擎未初始化")
raise HTTPException(status_code=500, detail="搜索引擎未初始化")
Do you need to file an issue?
Describe the issue
I copied the code from https://github.com/win4r/GraphRAG4OpenWebUI to my local machine (the project uses GraphRAG version 0.3.3), but my GraphRAG is the latest version 1.2.0. The code throws an error: cannot import name 'store_entity_semantic_embeddings' from 'graphrag.query.input.loaders.dfs'. How can I resolve this? here is the code below:
import os
import asyncio
import time
import uuid
import json
import re
import pandas as pd
import tiktoken
import logging
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any, Union
from contextlib import asynccontextmanager
from tavily import TavilyClient
GraphRAG 相关导入
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import LocalSearchMixedContext
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.query.structured_search.global_search.community_context import GlobalCommunityContext
from graphrag.query.structured_search.global_search.search import GlobalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(name)
设置常量和配置
INPUT_DIR = os.getenv('INPUT_DIR')
LANCEDB_URI = f"{INPUT_DIR}/lancedb"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2
PORT = 8012
全局变量,用于存储搜索引擎和问题生成器
local_search_engine = None
global_search_engine = None
question_generator = None
数据模型
class Message(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[Message]
temperature: Optional[float] = 1.0
top_p: Optional[float] = 1.0
n: Optional[int] = 1
stream: Optional[bool] = False
stop: Optional[Union[str, List[str]]] = None
max_tokens: Optional[int] = None
presence_penalty: Optional[float] = 0
frequency_penalty: Optional[float] = 0
logit_bias: Optional[Dict[str, float]] = None
user: Optional[str] = None
class ChatCompletionResponseChoice(BaseModel):
index: int
message: Message
finish_reason: Optional[str] = None
class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
class ChatCompletionResponse(BaseModel):
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
object: str = "chat.completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[ChatCompletionResponseChoice]
usage: Usage
system_fingerprint: Optional[str] = None
async def setup_llm_and_embedder():
"""
设置语言模型(LLM)和嵌入模型
"""
logger.info("正在设置LLM和嵌入器")
async def load_context():
"""
加载上下文数据,包括实体、关系、报告、文本单元和协变量
"""
logger.info("正在加载上下文数据")
try:
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
async def setup_search_engines(llm, token_encoder, text_embedder, entities, relationships, reports, text_units,
description_embedding_store, covariates):
"""
设置本地搜索引擎和全局搜索引擎
"""
logger.info("正在设置搜索引擎")
def format_response(response):
"""
格式化响应,添加适当的换行和段落分隔。
"""
paragraphs = re.split(r'\n{2,}', response)
async def tavily_search(prompt: str):
"""
使用Tavily API进行搜索
"""
try:
client = TavilyClient(api_key=os.environ['TAVILY_API_KEY'])
resp = client.search(prompt, search_depth="advanced")
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时执行
global local_search_engine, global_search_engine, question_generator
try:
logger.info("正在初始化搜索引擎和问题生成器...")
llm, token_encoder, text_embedder = await setup_llm_and_embedder()
entities, relationships, reports, text_units, description_embedding_store, covariates = await load_context()
local_search_engine, global_search_engine, local_context_builder, local_llm_params, local_context_params = await setup_search_engines(
llm, token_encoder, text_embedder, entities, relationships, reports, text_units,
description_embedding_store, covariates
)
app = FastAPI(lifespan=lifespan)
在 chat_completions 函数中添加以下代码
async def full_model_search(prompt: str):
"""
执行全模型搜索,包括本地检索、全局检索和 Tavily 搜索
"""
local_result = await local_search_engine.asearch(prompt)
global_result = await global_search_engine.asearch(prompt)
tavily_result = await tavily_search(prompt)
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
if not local_search_engine or not global_search_engine:
logger.error("搜索引擎未初始化")
raise HTTPException(status_code=500, detail="搜索引擎未初始化")
@app.get("/v1/models")
async def list_models():
"""
返回可用模型列表
"""
logger.info("收到模型列表请求")
current_time = int(time.time())
models = [
{"id": "graphrag-local-search:latest", "object": "model", "created": current_time - 100000, "owned_by": "graphrag"},
{"id": "graphrag-global-search:latest", "object": "model", "created": current_time - 95000, "owned_by": "graphrag"},
# {"id": "graphrag-question-generator:latest", "object": "model", "created": current_time - 90000, "owned_by": "graphrag"},
# {"id": "gpt-3.5-turbo:latest", "object": "model", "created": current_time - 80000, "owned_by": "openai"},
# {"id": "text-embedding-3-small:latest", "object": "model", "created": current_time - 70000, "owned_by": "openai"},
{"id": "tavily-search:latest", "object": "model", "created": current_time - 85000, "owned_by": "tavily"},
{"id": "full-model:latest", "object": "model", "created": current_time - 80000, "owned_by": "combined"}
if name == "main":
import uvicorn
Steps to reproduce
No response
GraphRAG Config Used
# Paste your config here
Logs and screenshots
No response
Additional Information
The text was updated successfully, but these errors were encountered: