Skip to content

Commit

Permalink
Add descriptive stats to mising tasks and add number of qrels (#1476)
Browse files Browse the repository at this point in the history
* add code for comupting number of qrels

* add stats fever hotpotqa msmarco topiocqa

* miracl mrtidy

* multilongdoc  miracl reranking

* add multi eurlex

* fix tests for descriptive stats

* fix tests

---------

Co-authored-by: Roman Solomatin <[email protected]>
  • Loading branch information
imenelydiaker and Samoed authored Nov 21, 2024
1 parent 0df0210 commit 0abe1a0
Show file tree
Hide file tree
Showing 14 changed files with 6,279 additions and 9 deletions.
3 changes: 3 additions & 0 deletions mteb/abstasks/AbsTaskRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics):
num_samples: int
num_queries: int
num_documents: int
num_relevant_docs: int
number_of_characters: int

min_document_length: int
Expand Down Expand Up @@ -419,6 +420,7 @@ def _calculate_metrics_from_split(
query_len, doc_len = calculate_length(queries, corpus)
num_documents = len(corpus)
num_queries = len(queries)
num_relevant_docs = sum(len(relevant_docs[qid]) for qid in relevant_docs)
none_queries = sum(q is None or len(q) == 0 for q in queries.values())

# create a list of number of relevant docs per query
Expand Down Expand Up @@ -466,6 +468,7 @@ def _calculate_metrics_from_split(
num_samples=num_documents + num_queries,
num_queries=num_queries,
num_documents=num_documents,
num_relevant_docs=num_relevant_docs,
min_document_length=min(doc_len),
average_document_length=sum(doc_len) / num_documents,
max_document_length=max(doc_len),
Expand Down
Loading

0 comments on commit 0abe1a0

Please sign in to comment.