Skip to content

Commit

Permalink
VespaSync Pool Size
Browse files Browse the repository at this point in the history
  • Loading branch information
docuracy committed Jan 25, 2025
1 parent 81aeda0 commit 580b03d
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
3 changes: 3 additions & 0 deletions vespa/repository/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class VespaSyncExtended(VespaSync):
"""
A subclass of VespaSync that adds the methods from VespaExtended.
"""
def __init__(self, app, pool_maxsize=20, **kwargs):
# Increase the pool size to 20 (see https://pyvespa.readthedocs.io/en/stable/reference-api.html#vespasync)
super().__init__(app, pool_maxsize=pool_maxsize, **kwargs)

def get_existing(self, data_id: str = None, namespace: str = None, schema: str = None) -> dict:
return self.app.get_existing(data_id=data_id, namespace=namespace, schema=schema)
Expand Down
4 changes: 2 additions & 2 deletions vespa/repository/api/ingestion/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def update_existing_place(task):
schema=schema,
data_id=document_id,
fields={
"names": existing_names + transformed_document['fields']['names']
"names": list(set(existing_names + transformed_document['fields']['names']))
}
)
# logger.info(f"Update response: {response.get('status_code')}: {response}")
Expand Down Expand Up @@ -163,7 +163,7 @@ def feed_document(sync_app, namespace, schema, transformed_document, task_id, co
schema=schema,
data_id=existing_toponym_id,
fields={
"places": existing_places + [document_id]
"places": list(set(existing_places + [document_id]))
}
)
else:
Expand Down
5 changes: 3 additions & 2 deletions vespa/repository/api/ingestion/triples.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,9 @@ def feed_triple(task):
schema=schema,
)
if preexisting and schema == "place":
document["fields"]["types"] = preexisting.get("fields").get("types", []) + document.get(
"fields").get("types", [])
document["fields"]["types"] = list(
set(preexisting.get("fields", {}).get("types", []) + document.get("fields", {}).get("types",
[])))

# logger.info(f"Updating {schema} {preexisting} with {document}")

Expand Down

0 comments on commit 580b03d

Please sign in to comment.