Skip to content

Commit

Permalink
minor fixes (#1514)
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyaspimpalgaonkar authored Oct 29, 2024
1 parent ebb4c6f commit 0480d2e
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
4 changes: 2 additions & 2 deletions py/core/pipelines/search_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ async def enqueue_requests():
await enqueue_task

vector_search_results = (
await vector_search_task if use_vector_search else None
await vector_search_task if use_vector_search else []
)
kg_results = await kg_task if do_kg else None
kg_results = await kg_task if do_kg else []

return AggregateSearchResult(
vector_search_results=vector_search_results,
Expand Down
31 changes: 26 additions & 5 deletions py/core/pipes/kg/community_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,13 @@ async def process_community(
break
except Exception as e:
if attempt == 2:
raise ValueError(
f"Failed to generate a summary for community {community_number} at level {community_level}."
) from e
logger.error(
f"KGCommunitySummaryPipe: Error generating community summary for community {community_number}: {e}"
)
return {
"community_number": community_number,
"error": str(e),
}

community_report = CommunityReport(
community_number=community_number,
Expand Down Expand Up @@ -267,11 +271,28 @@ async def _run_logic( # type: ignore
)
)

total_jobs = len(community_summary_jobs)
total_errors = 0
completed_community_summary_jobs = 0
for community_summary in asyncio.as_completed(community_summary_jobs):

summary = await community_summary
completed_community_summary_jobs += 1
if completed_community_summary_jobs % 50 == 0:
logger.info(
f"KGCommunitySummaryPipe: {completed_community_summary_jobs}/{len(community_summary_jobs)} community summaries completed, elapsed time: {time.time() - start_time:.2f} seconds"
f"KGCommunitySummaryPipe: {completed_community_summary_jobs}/{total_jobs} community summaries completed, elapsed time: {time.time() - start_time:.2f} seconds"
)

if "error" in summary:
logger.error(
f"KGCommunitySummaryPipe: Error generating community summary for community {summary['community_number']}: {summary['error']}"
)
yield await community_summary
total_errors += 1
continue

yield summary

if total_errors > 0:
raise ValueError(
f"KGCommunitySummaryPipe: Failed to generate community summaries for {total_errors} out of {total_jobs} communities. Please rerun the job if there are too many failures."
)
5 changes: 4 additions & 1 deletion py/core/pipes/kg/entity_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ async def process_entity(
.message.content
)

# will do more requests, but it is simpler
if not out_entity.description:
logger.error(f"No description for entity {out_entity.name}")
return out_entity.name

out_entity.description_embedding = (
await self.embedding_provider.async_get_embeddings(
[out_entity.description]
Expand Down

0 comments on commit 0480d2e

Please sign in to comment.