Skip to content

Commit

Permalink
fix: duplicate reference
Browse files Browse the repository at this point in the history
  • Loading branch information
milovate committed Jan 8, 2025
1 parent c38c952 commit 14cc26b
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions daras_ai_v2/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ def get_top_k_references(

# merge duplicate references
uniques: dict[str, SearchReference] = {}
for ref in references:
key = ref["url"]
for ref_key, ref in references:
key = ref_key
try:
existing = uniques[key]
except KeyError:
Expand All @@ -217,13 +217,20 @@ def vespa_search_results_to_refs(
for hit in search_result["root"].get("children", []):
try:
ref = EmbeddingsReference.objects.get(vespa_doc_id=hit["fields"]["id"])
ref_key = ref.url
except EmbeddingsReference.DoesNotExist:
continue
if "text/html" in ref.embedded_file.metadata.mime_type:
# logger.debug(f"Generating fragments {ref['url']} as it is a HTML file")
ref.url = generate_text_fragment_url(url=ref.url, text=ref.snippet)
yield SearchReference(
url=ref.url, title=ref.title, snippet=ref.snippet, score=hit["relevance"]
yield (
ref_key,
SearchReference(
url=ref.url,
title=ref.title,
snippet=ref.snippet,
score=hit["relevance"],
),
)


Expand Down

0 comments on commit 14cc26b

Please sign in to comment.